Implement MoreLikeThis transformer
This commit is contained in:
parent
81d8abe72c
commit
4d5f8b5b37
|
@ -486,11 +486,12 @@ public class LuceneUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Mono<LocalQueryParams> getMoreLikeThisQuery(
|
public static Mono<LocalQueryParams> getMoreLikeThisQuery(
|
||||||
List<LLIndexSearcher> indexSearchers,
|
LLIndexSearchers inputIndexSearchers,
|
||||||
LocalQueryParams localQueryParams,
|
LocalQueryParams localQueryParams,
|
||||||
Analyzer analyzer,
|
Analyzer analyzer,
|
||||||
Similarity similarity,
|
Similarity similarity,
|
||||||
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
||||||
|
var indexSearchers = inputIndexSearchers.shards();
|
||||||
Query luceneAdditionalQuery;
|
Query luceneAdditionalQuery;
|
||||||
try {
|
try {
|
||||||
luceneAdditionalQuery = localQueryParams.query();
|
luceneAdditionalQuery = localQueryParams.query();
|
||||||
|
|
|
@ -50,7 +50,7 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
* Generate "more like this" similarity queries. Based on this mail:
|
* Generate "more like this" similarity queries. Based on this mail:
|
||||||
*
|
*
|
||||||
* <pre><code>
|
* <pre><code>
|
||||||
* Lucene does let you access the document frequency of terms, with BigCompositeReader.docFreq().
|
* Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
|
||||||
* Term frequencies can be computed by re-tokenizing the text, which, for a single document,
|
* Term frequencies can be computed by re-tokenizing the text, which, for a single document,
|
||||||
* is usually fast enough. But looking up the docFreq() of every term in the document is
|
* is usually fast enough. But looking up the docFreq() of every term in the document is
|
||||||
* probably too slow.
|
* probably too slow.
|
||||||
|
@ -84,7 +84,7 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
* usage is as follows. The bold fragment is specific to this class. <br>
|
* usage is as follows. The bold fragment is specific to this class. <br>
|
||||||
*
|
*
|
||||||
* <pre class="prettyprint">
|
* <pre class="prettyprint">
|
||||||
* BigCompositeReader ir = ...
|
* IndexReader ir = ...
|
||||||
* IndexSearcher is = ...
|
* IndexSearcher is = ...
|
||||||
*
|
*
|
||||||
* MoreLikeThis mlt = new MoreLikeThis(ir);
|
* MoreLikeThis mlt = new MoreLikeThis(ir);
|
||||||
|
@ -264,7 +264,7 @@ public final class MultiMoreLikeThis {
|
||||||
/** For idf() calculations. */
|
/** For idf() calculations. */
|
||||||
private TFIDFSimilarity similarity; // = new DefaultSimilarity();
|
private TFIDFSimilarity similarity; // = new DefaultSimilarity();
|
||||||
|
|
||||||
/** BigCompositeReader to use */
|
/** IndexReader to use */
|
||||||
private final BigCompositeReader<?> ir;
|
private final BigCompositeReader<?> ir;
|
||||||
|
|
||||||
/** Boost factor to use when boosting the terms */
|
/** Boost factor to use when boosting the terms */
|
||||||
|
@ -289,7 +289,7 @@ public final class MultiMoreLikeThis {
|
||||||
this.boostFactor = boostFactor;
|
this.boostFactor = boostFactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Constructor requiring a BigCompositeReader. */
|
/** Constructor requiring an IndexReader. */
|
||||||
public MultiMoreLikeThis(BigCompositeReader<?> ir) {
|
public MultiMoreLikeThis(BigCompositeReader<?> ir) {
|
||||||
this(ir, new ClassicSimilarity());
|
this(ir, new ClassicSimilarity());
|
||||||
}
|
}
|
||||||
|
@ -401,7 +401,7 @@ public final class MultiMoreLikeThis {
|
||||||
* be still considered relevant.
|
* be still considered relevant.
|
||||||
*/
|
*/
|
||||||
public void setMaxDocFreqPct(long maxPercentage) {
|
public void setMaxDocFreqPct(long maxPercentage) {
|
||||||
setMaxDocFreq((maxPercentage) * ir.maxDoc() / 100L);
|
setMaxDocFreq(maxPercentage * ir.maxDoc() / 100L);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -437,7 +437,7 @@ public final class MultiMoreLikeThis {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the field names that will be used when generating the 'More Like This' query. Set this to
|
* Sets the field names that will be used when generating the 'More Like This' query. Set this to
|
||||||
* null for the field names to be determined at runtime from the BigCompositeReader provided in the
|
* null for the field names to be determined at runtime from the IndexReader provided in the
|
||||||
* constructor.
|
* constructor.
|
||||||
*
|
*
|
||||||
* @param fieldNames the field names that will be used when generating the 'More Like This' query.
|
* @param fieldNames the field names that will be used when generating the 'More Like This' query.
|
||||||
|
|
|
@ -3,6 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||||
import io.net5.buffer.api.Send;
|
import io.net5.buffer.api.Send;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
|
|
||||||
public class AdaptiveLuceneLocalSearcher implements LuceneLocalSearcher {
|
public class AdaptiveLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||||
|
|
|
@ -28,7 +28,8 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||||
} else if (queryParams.isSorted() || queryParams.isScored()) {
|
} else if (queryParams.isSorted() || queryParams.isScored()) {
|
||||||
return scoredSimpleLuceneShardSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
return scoredSimpleLuceneShardSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
||||||
} else {
|
} else {
|
||||||
if (((long) queryParams.offset() + (long) queryParams.limit()) <= (long) queryParams.pageLimits().getPageLimit(0)) {
|
if (((long) queryParams.offset() + (long) queryParams.limit()) <= (long) queryParams.pageLimits().getPageLimit(0)
|
||||||
|
|| transformer != null) {
|
||||||
// Run single-page searches using the paged multi searcher
|
// Run single-page searches using the paged multi searcher
|
||||||
return unsortedUnscoredPagedLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
return unsortedUnscoredPagedLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -4,6 +4,9 @@ import io.net5.buffer.api.Send;
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
import it.cavallium.dbengine.database.LLUtils;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||||
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
import reactor.core.scheduler.Schedulers;
|
import reactor.core.scheduler.Schedulers;
|
||||||
|
@ -18,12 +21,18 @@ public class CountLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||||
return Mono
|
return Mono
|
||||||
.usingWhen(
|
.usingWhen(
|
||||||
indexSearcherMono,
|
indexSearcherMono,
|
||||||
indexSearcher -> Mono.fromCallable(() -> {
|
indexSearcher -> {
|
||||||
try (var is = indexSearcher.receive()) {
|
var queryParamsMono = transformer
|
||||||
LLUtils.ensureBlocking();
|
.transform(Mono.fromSupplier(() -> new TransformerInput(LLIndexSearchers.unsharded(indexSearcher),
|
||||||
return is.getIndexSearcher().count(queryParams.query());
|
queryParams)));
|
||||||
}
|
|
||||||
}).subscribeOn(Schedulers.boundedElastic()),
|
return queryParamsMono.flatMap(queryParams2 -> Mono.fromCallable(() -> {
|
||||||
|
try (var is = indexSearcher.receive()) {
|
||||||
|
LLUtils.ensureBlocking();
|
||||||
|
return is.getIndexSearcher().count(queryParams2.query());
|
||||||
|
}
|
||||||
|
}).subscribeOn(Schedulers.boundedElastic()));
|
||||||
|
},
|
||||||
is -> Mono.empty()
|
is -> Mono.empty()
|
||||||
)
|
)
|
||||||
.map(count -> new LuceneSearchResult(TotalHitsCount.of(count, true), Flux.empty(), null).send())
|
.map(count -> new LuceneSearchResult(TotalHitsCount.of(count, true), Flux.empty(), null).send())
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package it.cavallium.dbengine.lucene.searcher;
|
package it.cavallium.dbengine.lucene.searcher;
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||||
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
|
@ -10,7 +11,7 @@ public interface LLSearchTransformer {
|
||||||
LLSearchTransformer NO_TRANSFORMATION = queryParamsMono -> queryParamsMono
|
LLSearchTransformer NO_TRANSFORMATION = queryParamsMono -> queryParamsMono
|
||||||
.map(TransformerInput::queryParams);
|
.map(TransformerInput::queryParams);
|
||||||
|
|
||||||
record TransformerInput(List<LLIndexSearcher> indexSearchers,
|
record TransformerInput(LLIndexSearchers indexSearchers,
|
||||||
LocalQueryParams queryParams) {}
|
LocalQueryParams queryParams) {}
|
||||||
|
|
||||||
Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono);
|
Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono);
|
||||||
|
|
|
@ -11,6 +11,7 @@ import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers.UnshardedIndexSearchers;
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers.UnshardedIndexSearchers;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -37,18 +38,24 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||||
|
|
||||||
var indexSearchersMono = indexSearcherMono.map(LLIndexSearchers::unsharded);
|
var indexSearchersMono = indexSearcherMono.map(LLIndexSearchers::unsharded);
|
||||||
|
|
||||||
return LLUtils.usingResource(indexSearchersMono, indexSearchers -> this
|
return LLUtils.usingResource(indexSearchersMono, indexSearchers -> {
|
||||||
// Search first page results
|
var queryParamsMono = transformer
|
||||||
.searchFirstPage(indexSearchers.shards(), queryParams, paginationInfo)
|
.transform(Mono.fromSupplier(() -> new TransformerInput(indexSearchers, queryParams)));
|
||||||
// Compute the results of the first page
|
|
||||||
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers.shards(),
|
return queryParamsMono.flatMap(queryParams2 -> this
|
||||||
keyFieldName, queryParams))
|
// Search first page results
|
||||||
// Compute other results
|
.searchFirstPage(indexSearchers.shards(), queryParams2, paginationInfo)
|
||||||
.transform(firstResult -> this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams,
|
// Compute the results of the first page
|
||||||
keyFieldName, indexSearchers::close))
|
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers.shards(),
|
||||||
// Ensure that one LuceneSearchResult is always returned
|
keyFieldName, queryParams2))
|
||||||
.single(),
|
// Compute other results
|
||||||
false);
|
.transform(firstResult -> this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams2,
|
||||||
|
keyFieldName, indexSearchers::close))
|
||||||
|
// Ensure that one LuceneSearchResult is always returned
|
||||||
|
.single()
|
||||||
|
);
|
||||||
|
},
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -7,6 +7,7 @@ import it.cavallium.dbengine.database.LLUtils;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||||
|
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
|
@ -25,55 +26,62 @@ public class SimpleUnsortedUnscoredLuceneMultiSearcher implements LuceneMultiSea
|
||||||
LocalQueryParams queryParams,
|
LocalQueryParams queryParams,
|
||||||
String keyFieldName,
|
String keyFieldName,
|
||||||
LLSearchTransformer transformer) {
|
LLSearchTransformer transformer) {
|
||||||
var indexSearchersSendResource = Mono
|
|
||||||
.fromRunnable(() -> {
|
|
||||||
LLUtils.ensureBlocking();
|
|
||||||
if (queryParams.isSorted() && queryParams.limit() > 0) {
|
|
||||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
|
||||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
|
||||||
}
|
|
||||||
if (queryParams.isScored() && queryParams.limit() > 0) {
|
|
||||||
throw new UnsupportedOperationException("Scored queries are not supported"
|
|
||||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.then(indexSearchersMono);
|
|
||||||
var localQueryParams = getLocalQueryParams(queryParams);
|
|
||||||
|
|
||||||
return LLUtils.usingSendResource(indexSearchersSendResource,
|
return LLUtils.usingSendResource(indexSearchersMono,
|
||||||
indexSearchers -> Flux
|
indexSearchers -> {
|
||||||
.fromIterable(indexSearchers.shards())
|
var queryParamsMono = transformer
|
||||||
.flatMap(searcher -> {
|
.transform(Mono.fromSupplier(() -> new TransformerInput(indexSearchers,
|
||||||
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
queryParams)));
|
||||||
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
|
||||||
})
|
return queryParamsMono.flatMap(queryParams2 -> {
|
||||||
.collectList()
|
var localQueryParams = getLocalQueryParams(queryParams2);
|
||||||
.map(results -> {
|
return Mono
|
||||||
List<LuceneSearchResult> resultsToDrop = new ArrayList<>(results.size());
|
.fromRunnable(() -> {
|
||||||
List<Flux<LLKeyScore>> resultsFluxes = new ArrayList<>(results.size());
|
LLUtils.ensureBlocking();
|
||||||
boolean exactTotalHitsCount = true;
|
if (queryParams2.isSorted() && queryParams2.limit() > 0) {
|
||||||
long totalHitsCountValue = 0;
|
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||||
for (Send<LuceneSearchResult> resultToReceive : results) {
|
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||||
LuceneSearchResult result = resultToReceive.receive();
|
}
|
||||||
resultsToDrop.add(result);
|
if (queryParams2.isScored() && queryParams2.limit() > 0) {
|
||||||
resultsFluxes.add(result.results());
|
throw new UnsupportedOperationException("Scored queries are not supported"
|
||||||
exactTotalHitsCount &= result.totalHitsCount().exact();
|
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||||
totalHitsCountValue += result.totalHitsCount().value();
|
}
|
||||||
|
})
|
||||||
|
.thenMany(Flux.fromIterable(indexSearchers.shards()))
|
||||||
|
.flatMap(searcher -> {
|
||||||
|
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
||||||
|
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
||||||
|
})
|
||||||
|
.collectList()
|
||||||
|
.map(results -> {
|
||||||
|
List<LuceneSearchResult> resultsToDrop = new ArrayList<>(results.size());
|
||||||
|
List<Flux<LLKeyScore>> resultsFluxes = new ArrayList<>(results.size());
|
||||||
|
boolean exactTotalHitsCount = true;
|
||||||
|
long totalHitsCountValue = 0;
|
||||||
|
for (Send<LuceneSearchResult> resultToReceive : results) {
|
||||||
|
LuceneSearchResult result = resultToReceive.receive();
|
||||||
|
resultsToDrop.add(result);
|
||||||
|
resultsFluxes.add(result.results());
|
||||||
|
exactTotalHitsCount &= result.totalHitsCount().exact();
|
||||||
|
totalHitsCountValue += result.totalHitsCount().value();
|
||||||
|
}
|
||||||
|
|
||||||
|
var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount);
|
||||||
|
Flux<LLKeyScore> mergedFluxes = Flux
|
||||||
|
.merge(resultsFluxes)
|
||||||
|
.skip(queryParams2.offset())
|
||||||
|
.take(queryParams2.limit(), true);
|
||||||
|
|
||||||
|
return new LuceneSearchResult(totalHitsCount, mergedFluxes, () -> {
|
||||||
|
for (LuceneSearchResult luceneSearchResult : resultsToDrop) {
|
||||||
|
luceneSearchResult.close();
|
||||||
|
}
|
||||||
|
indexSearchers.close();
|
||||||
|
}).send();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
);
|
||||||
var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount);
|
},
|
||||||
Flux<LLKeyScore> mergedFluxes = Flux
|
|
||||||
.merge(resultsFluxes)
|
|
||||||
.skip(queryParams.offset())
|
|
||||||
.take(queryParams.limit(), true);
|
|
||||||
|
|
||||||
return new LuceneSearchResult(totalHitsCount, mergedFluxes, () -> {
|
|
||||||
for (LuceneSearchResult luceneSearchResult : resultsToDrop) {
|
|
||||||
luceneSearchResult.close();
|
|
||||||
}
|
|
||||||
indexSearchers.close();
|
|
||||||
}).send();
|
|
||||||
}),
|
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,10 @@ public class UnsortedUnscoredContinuousLuceneMultiSearcher implements LuceneMult
|
||||||
var indexSearchersSendResource = Mono
|
var indexSearchersSendResource = Mono
|
||||||
.fromRunnable(() -> {
|
.fromRunnable(() -> {
|
||||||
LLUtils.ensureBlocking();
|
LLUtils.ensureBlocking();
|
||||||
|
if (transformer != null) {
|
||||||
|
throw new UnsupportedOperationException("Transformers are not supported"
|
||||||
|
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
|
||||||
|
}
|
||||||
if (queryParams.isSorted() && queryParams.limit() > 0) {
|
if (queryParams.isSorted() && queryParams.limit() > 0) {
|
||||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||||
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
|
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
|
||||||
|
|
Loading…
Reference in New Issue
Block a user