Simplify query transformations
This commit is contained in:
parent
5c0434c73f
commit
58943b5e08
|
@ -1,11 +1,14 @@
|
||||||
package it.cavallium.dbengine.client;
|
package it.cavallium.dbengine.client;
|
||||||
|
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
|
import com.google.common.collect.Multimaps;
|
||||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
import it.cavallium.dbengine.database.LLIndexRequest;
|
||||||
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
||||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
import it.cavallium.dbengine.database.LLUpdateDocument;
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
import it.cavallium.dbengine.database.LLTerm;
|
||||||
import it.cavallium.dbengine.database.LLUpdateFields;
|
import it.cavallium.dbengine.database.LLUpdateFields;
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
import it.cavallium.dbengine.database.LLUtils;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
|
@ -43,7 +46,7 @@ public abstract class Indicizer<T, U> {
|
||||||
|
|
||||||
public abstract IndicizerSimilarities getPerFieldSimilarity();
|
public abstract IndicizerSimilarities getPerFieldSimilarity();
|
||||||
|
|
||||||
public Flux<Tuple2<String, Set<String>>> getMoreLikeThisDocumentFields(T key, U value) {
|
public Multimap<String, String> getMoreLikeThisDocumentFields(T key, U value) {
|
||||||
return Flux.empty();
|
return Multimaps.forMap(Map.of());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -90,7 +90,7 @@ public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
|
||||||
public Mono<Hits<HitKey<T>>> moreLikeThis(ClientQueryParams queryParams,
|
public Mono<Hits<HitKey<T>>> moreLikeThis(ClientQueryParams queryParams,
|
||||||
T key,
|
T key,
|
||||||
U mltDocumentValue) {
|
U mltDocumentValue) {
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFields
|
var mltDocumentFields
|
||||||
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
|
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
|
||||||
|
|
||||||
return luceneIndex
|
return luceneIndex
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
package it.cavallium.dbengine.database;
|
package it.cavallium.dbengine.database;
|
||||||
|
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
import io.net5.buffer.api.Resource;
|
import io.net5.buffer.api.Resource;
|
||||||
import io.net5.buffer.api.Send;
|
import io.net5.buffer.api.Send;
|
||||||
import it.cavallium.data.generator.nativedata.Nullablefloat;
|
import it.cavallium.data.generator.nativedata.Nullablefloat;
|
||||||
|
@ -51,7 +52,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
|
||||||
Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||||
QueryParams queryParams,
|
QueryParams queryParams,
|
||||||
String keyFieldName,
|
String keyFieldName,
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFields);
|
Multimap<String, String> mltDocumentFields);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
||||||
|
|
|
@ -6,10 +6,10 @@ import static it.cavallium.dbengine.database.LLUtils.toDocument;
|
||||||
import static it.cavallium.dbengine.database.LLUtils.toFields;
|
import static it.cavallium.dbengine.database.LLUtils.toFields;
|
||||||
import static it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.NO_TRANSFORMATION;
|
import static it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.NO_TRANSFORMATION;
|
||||||
|
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
import io.micrometer.core.instrument.Counter;
|
import io.micrometer.core.instrument.Counter;
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import io.micrometer.core.instrument.Timer;
|
import io.micrometer.core.instrument.Timer;
|
||||||
import io.net5.buffer.api.Resource;
|
|
||||||
import io.net5.buffer.api.Send;
|
import io.net5.buffer.api.Send;
|
||||||
import it.cavallium.dbengine.client.DirectIOOptions;
|
import it.cavallium.dbengine.client.DirectIOOptions;
|
||||||
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
||||||
|
@ -32,10 +32,10 @@ import it.cavallium.dbengine.lucene.AlwaysDirectIOFSDirectory;
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||||
|
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
||||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
|
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
||||||
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -45,7 +45,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.Phaser;
|
import java.util.concurrent.Phaser;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -80,7 +79,6 @@ import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
import reactor.core.scheduler.Scheduler;
|
import reactor.core.scheduler.Scheduler;
|
||||||
import reactor.core.scheduler.Schedulers;
|
import reactor.core.scheduler.Schedulers;
|
||||||
import reactor.util.function.Tuple2;
|
|
||||||
|
|
||||||
public class LLLocalLuceneIndex implements LLLuceneIndex {
|
public class LLLocalLuceneIndex implements LLLuceneIndex {
|
||||||
|
|
||||||
|
@ -442,10 +440,10 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
|
||||||
public Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
public Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||||
QueryParams queryParams,
|
QueryParams queryParams,
|
||||||
String keyFieldName,
|
String keyFieldName,
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
Multimap<String, String> mltDocumentFieldsFlux) {
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||||
var searcher = this.searcherManager.retrieveSearcher(snapshot);
|
var searcher = this.searcherManager.retrieveSearcher(snapshot);
|
||||||
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux);
|
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity);
|
||||||
|
|
||||||
return localSearcher
|
return localSearcher
|
||||||
.collect(searcher, localQueryParams, keyFieldName, transformer)
|
.collect(searcher, localQueryParams, keyFieldName, transformer)
|
||||||
|
@ -601,18 +599,4 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
|
||||||
return lowMemory;
|
return lowMemory;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MoreLikeThisTransformer implements LLSearchTransformer {
|
|
||||||
|
|
||||||
private final Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux;
|
|
||||||
|
|
||||||
public MoreLikeThisTransformer(Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
|
||||||
this.mltDocumentFieldsFlux = mltDocumentFieldsFlux;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono) {
|
|
||||||
return inputMono.flatMap(input -> LuceneUtils.getMoreLikeThisQuery(input.indexSearchers(), input.queryParams(),
|
|
||||||
luceneAnalyzer, luceneSimilarity, mltDocumentFieldsFlux));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,8 @@ package it.cavallium.dbengine.database.disk;
|
||||||
|
|
||||||
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
||||||
|
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import io.net5.buffer.api.Resource;
|
|
||||||
import io.net5.buffer.api.Send;
|
import io.net5.buffer.api.Send;
|
||||||
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
||||||
import it.cavallium.dbengine.client.IndicizerSimilarities;
|
import it.cavallium.dbengine.client.IndicizerSimilarities;
|
||||||
|
@ -12,23 +12,22 @@ import it.cavallium.dbengine.client.query.QueryParser;
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
import it.cavallium.dbengine.database.LLIndexRequest;
|
||||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLItem;
|
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
import it.cavallium.dbengine.database.LLSearchResultShard;
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
import it.cavallium.dbengine.database.LLSnapshot;
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
import it.cavallium.dbengine.database.LLTerm;
|
||||||
|
import it.cavallium.dbengine.database.LLUpdateDocument;
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
import it.cavallium.dbengine.database.LLUtils;
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||||
|
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
||||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
|
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
||||||
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer;
|
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer;
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
||||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -39,7 +38,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||||
|
@ -49,7 +47,6 @@ import org.jetbrains.annotations.Nullable;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
import reactor.core.scheduler.Schedulers;
|
import reactor.core.scheduler.Schedulers;
|
||||||
import reactor.util.function.Tuple2;
|
|
||||||
|
|
||||||
public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
|
public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
|
||||||
|
|
||||||
|
@ -233,10 +230,10 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
|
||||||
public Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
public Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||||
QueryParams queryParams,
|
QueryParams queryParams,
|
||||||
String keyFieldName,
|
String keyFieldName,
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
|
Multimap<String, String> mltDocumentFields) {
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||||
var searchers = this.getIndexSearchers(snapshot);
|
var searchers = this.getIndexSearchers(snapshot);
|
||||||
var transformer = new MultiMoreLikeThisTransformer(mltDocumentFields);
|
var transformer = new MoreLikeThisTransformer(mltDocumentFields, luceneAnalyzer, luceneSimilarity);
|
||||||
|
|
||||||
// Collect all the shards results into a single global result
|
// Collect all the shards results into a single global result
|
||||||
return multiSearcher
|
return multiSearcher
|
||||||
|
@ -340,19 +337,4 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
|
||||||
public boolean isLowMemoryMode() {
|
public boolean isLowMemoryMode() {
|
||||||
return luceneIndices[0].isLowMemoryMode();
|
return luceneIndices[0].isLowMemoryMode();
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MultiMoreLikeThisTransformer implements LLSearchTransformer {
|
|
||||||
|
|
||||||
private final Flux<Tuple2<String, Set<String>>> mltDocumentFields;
|
|
||||||
|
|
||||||
public MultiMoreLikeThisTransformer(Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
|
|
||||||
this.mltDocumentFields = mltDocumentFields;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono) {
|
|
||||||
return inputMono.flatMap(input -> LuceneUtils.getMoreLikeThisQuery(input.indexSearchers(), input.queryParams(),
|
|
||||||
luceneAnalyzer, luceneSimilarity, mltDocumentFields));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,8 @@ package it.cavallium.dbengine.lucene;
|
||||||
|
|
||||||
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
||||||
|
|
||||||
|
import com.google.common.collect.HashMultimap;
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
import it.cavallium.dbengine.client.CompositeSnapshot;
|
import it.cavallium.dbengine.client.CompositeSnapshot;
|
||||||
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
||||||
import it.cavallium.dbengine.client.IndicizerSimilarities;
|
import it.cavallium.dbengine.client.IndicizerSimilarities;
|
||||||
|
@ -31,12 +33,12 @@ import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
|
@ -49,6 +51,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.queries.mlt.MoreLikeThisQuery;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
import org.apache.lucene.search.BooleanQuery.Builder;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
|
@ -79,7 +82,6 @@ import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
import reactor.core.scheduler.Schedulers;
|
import reactor.core.scheduler.Schedulers;
|
||||||
import reactor.util.concurrent.Queues;
|
import reactor.util.concurrent.Queues;
|
||||||
import reactor.util.function.Tuple2;
|
|
||||||
|
|
||||||
public class LuceneUtils {
|
public class LuceneUtils {
|
||||||
|
|
||||||
|
@ -470,37 +472,25 @@ public class LuceneUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Mono<LocalQueryParams> getMoreLikeThisQuery(
|
public static Query getMoreLikeThisQuery(LLIndexSearchers inputIndexSearchers,
|
||||||
LLIndexSearchers inputIndexSearchers,
|
|
||||||
LocalQueryParams localQueryParams,
|
LocalQueryParams localQueryParams,
|
||||||
Analyzer analyzer,
|
Analyzer analyzer,
|
||||||
Similarity similarity,
|
Similarity similarity,
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
Multimap<String, String> mltDocumentFieldsMultimap) throws IOException {
|
||||||
var indexSearchers = inputIndexSearchers.shards();
|
List<IndexSearcher> indexSearchers = inputIndexSearchers.shards();
|
||||||
Query luceneAdditionalQuery;
|
Query luceneAdditionalQuery = localQueryParams.query();
|
||||||
try {
|
// Create the mutable version of the input
|
||||||
luceneAdditionalQuery = localQueryParams.query();
|
Map<String, Collection<String>> mltDocumentFields = HashMultimap.create(mltDocumentFieldsMultimap).asMap();
|
||||||
} catch (Exception e) {
|
|
||||||
return Mono.error(e);
|
|
||||||
}
|
|
||||||
return mltDocumentFieldsFlux
|
|
||||||
.collectMap(Tuple2::getT1, Tuple2::getT2, HashMap::new)
|
|
||||||
.flatMap(mltDocumentFields -> Mono.fromCallable(() -> {
|
|
||||||
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
|
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
|
||||||
if (mltDocumentFields.isEmpty()) {
|
if (mltDocumentFields.isEmpty()) {
|
||||||
return new LocalQueryParams(new MatchNoDocsQuery(),
|
return new MatchNoDocsQuery();
|
||||||
localQueryParams.offsetLong(),
|
|
||||||
localQueryParams.limitLong(),
|
|
||||||
DEFAULT_PAGE_LIMITS,
|
|
||||||
localQueryParams.minCompetitiveScore(),
|
|
||||||
localQueryParams.sort(),
|
|
||||||
localQueryParams.computePreciseHitsCount(),
|
|
||||||
localQueryParams.timeout()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
MultiMoreLikeThis mlt;
|
MultiMoreLikeThis mlt;
|
||||||
if (indexSearchers.size() == 1) {
|
if (indexSearchers.size() == 1) {
|
||||||
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new), null);
|
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new),
|
||||||
|
null
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
IndexReader[] indexReaders = new IndexReader[indexSearchers.size()];
|
IndexReader[] indexReaders = new IndexReader[indexSearchers.size()];
|
||||||
for (int i = 0, size = indexSearchers.size(); i < size; i++) {
|
for (int i = 0, size = indexSearchers.size(); i < size; i++) {
|
||||||
|
@ -522,8 +512,7 @@ public class LuceneUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the reference docId and apply it to MoreLikeThis, to generate the query
|
// Get the reference docId and apply it to MoreLikeThis, to generate the query
|
||||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
Query mltQuery = mlt.like(mltDocumentFields);
|
||||||
var mltQuery = mlt.like((Map) mltDocumentFields);
|
|
||||||
Query luceneQuery;
|
Query luceneQuery;
|
||||||
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
|
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
|
||||||
luceneQuery = new Builder()
|
luceneQuery = new Builder()
|
||||||
|
@ -533,17 +522,7 @@ public class LuceneUtils {
|
||||||
} else {
|
} else {
|
||||||
luceneQuery = mltQuery;
|
luceneQuery = mltQuery;
|
||||||
}
|
}
|
||||||
|
return luceneQuery;
|
||||||
return new LocalQueryParams(luceneQuery,
|
|
||||||
localQueryParams.offsetLong(),
|
|
||||||
localQueryParams.limitLong(),
|
|
||||||
DEFAULT_PAGE_LIMITS,
|
|
||||||
localQueryParams.minCompetitiveScore(),
|
|
||||||
localQueryParams.sort(),
|
|
||||||
localQueryParams.computePreciseHitsCount(),
|
|
||||||
localQueryParams.timeout());
|
|
||||||
}).subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic())))
|
|
||||||
.publishOn(Schedulers.parallel());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Collector withTimeout(Collector collector, Duration timeout) {
|
public static Collector withTimeout(Collector collector, Duration timeout) {
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
package it.cavallium.dbengine.lucene.mlt;
|
||||||
|
|
||||||
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
||||||
|
|
||||||
|
import com.google.common.collect.Multimap;
|
||||||
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import reactor.core.publisher.Mono;
|
||||||
|
import reactor.core.scheduler.Schedulers;
|
||||||
|
|
||||||
|
public class MoreLikeThisTransformer implements LLSearchTransformer {
|
||||||
|
|
||||||
|
private final Multimap<String, String> mltDocumentFields;
|
||||||
|
private final PerFieldAnalyzerWrapper luceneAnalyzer;
|
||||||
|
private final Similarity luceneSimilarity;
|
||||||
|
|
||||||
|
public MoreLikeThisTransformer(Multimap<String, String> mltDocumentFields,
|
||||||
|
PerFieldAnalyzerWrapper luceneAnalyzer,
|
||||||
|
Similarity luceneSimilarity) {
|
||||||
|
this.mltDocumentFields = mltDocumentFields;
|
||||||
|
this.luceneAnalyzer = luceneAnalyzer;
|
||||||
|
this.luceneSimilarity = luceneSimilarity;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono) {
|
||||||
|
return inputMono.publishOn(uninterruptibleScheduler(Schedulers.boundedElastic())).handle((input, sink) -> {
|
||||||
|
try {
|
||||||
|
var rewrittenQuery = LuceneUtils.getMoreLikeThisQuery(input.indexSearchers(),
|
||||||
|
input.queryParams(),
|
||||||
|
luceneAnalyzer,
|
||||||
|
luceneSimilarity,
|
||||||
|
mltDocumentFields
|
||||||
|
);
|
||||||
|
var queryParams = input.queryParams();
|
||||||
|
sink.next(new LocalQueryParams(rewrittenQuery,
|
||||||
|
queryParams.offsetLong(),
|
||||||
|
queryParams.limitLong(),
|
||||||
|
queryParams.pageLimits(),
|
||||||
|
queryParams.minCompetitiveScore(),
|
||||||
|
queryParams.sort(),
|
||||||
|
queryParams.computePreciseHitsCount(),
|
||||||
|
queryParams.timeout()
|
||||||
|
));
|
||||||
|
} catch (IOException ex) {
|
||||||
|
sink.error(ex);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
|
@ -564,7 +564,7 @@ public final class MultiMoreLikeThis {
|
||||||
* @param filteredDocument Document with field values extracted for selected fields.
|
* @param filteredDocument Document with field values extracted for selected fields.
|
||||||
* @return More Like This query for the passed document.
|
* @return More Like This query for the passed document.
|
||||||
*/
|
*/
|
||||||
public Query like(Map<String, Collection<Object>> filteredDocument) throws IOException {
|
public Query like(Map<String, ? extends Collection<?>> filteredDocument) throws IOException {
|
||||||
if (fieldNames == null) {
|
if (fieldNames == null) {
|
||||||
// gather list of valid fields from lucene
|
// gather list of valid fields from lucene
|
||||||
Collection<String> fields = BigCompositeReader.getIndexedFields(ir);
|
Collection<String> fields = BigCompositeReader.getIndexedFields(ir);
|
||||||
|
@ -743,11 +743,11 @@ public final class MultiMoreLikeThis {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, Collection<Object>> field2fieldValues)
|
private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, ? extends Collection<?>> field2fieldValues)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Map<String, Map<String, Long>> field2termFreqMap = new HashMap<>();
|
Map<String, Map<String, Long>> field2termFreqMap = new HashMap<>();
|
||||||
for (String fieldName : fieldNames) {
|
for (String fieldName : fieldNames) {
|
||||||
Collection<Object> fieldValues = field2fieldValues.get(fieldName);
|
Collection<?> fieldValues = field2fieldValues.get(fieldName);
|
||||||
if (fieldValues == null) {
|
if (fieldValues == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,19 +43,8 @@ public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher {
|
||||||
|
|
||||||
return queryParamsMono.flatMap(queryParams2 -> {
|
return queryParamsMono.flatMap(queryParams2 -> {
|
||||||
var localQueryParams = getLocalQueryParams(queryParams2);
|
var localQueryParams = getLocalQueryParams(queryParams2);
|
||||||
return Mono
|
return Flux
|
||||||
.fromRunnable(() -> {
|
.fromIterable(indexSearchers.shards())
|
||||||
LLUtils.ensureBlocking();
|
|
||||||
if (queryParams2.isSorted() && queryParams2.limitLong() > 0) {
|
|
||||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
|
||||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
|
||||||
}
|
|
||||||
if (queryParams2.needsScores() && queryParams2.limitLong() > 0) {
|
|
||||||
throw new UnsupportedOperationException("Scored queries are not supported"
|
|
||||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.thenMany(Flux.fromIterable(indexSearchers.shards()))
|
|
||||||
.flatMap(searcher -> {
|
.flatMap(searcher -> {
|
||||||
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
||||||
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
||||||
|
@ -85,6 +74,17 @@ public class UnsortedUnscoredSimpleMultiSearcher implements MultiSearcher {
|
||||||
}
|
}
|
||||||
indexSearchers.close();
|
indexSearchers.close();
|
||||||
});
|
});
|
||||||
|
})
|
||||||
|
.doFirst(() -> {
|
||||||
|
LLUtils.ensureBlocking();
|
||||||
|
if (queryParams2.isSorted() && queryParams2.limitLong() > 0) {
|
||||||
|
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||||
|
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||||
|
}
|
||||||
|
if (queryParams2.needsScores() && queryParams2.limitLong() > 0) {
|
||||||
|
throw new UnsupportedOperationException("Scored queries are not supported"
|
||||||
|
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user