Implement MoreLikeThis transformer
This commit is contained in:
parent
81d8abe72c
commit
4d5f8b5b37
|
@ -486,11 +486,12 @@ public class LuceneUtils {
|
|||
}
|
||||
|
||||
public static Mono<LocalQueryParams> getMoreLikeThisQuery(
|
||||
List<LLIndexSearcher> indexSearchers,
|
||||
LLIndexSearchers inputIndexSearchers,
|
||||
LocalQueryParams localQueryParams,
|
||||
Analyzer analyzer,
|
||||
Similarity similarity,
|
||||
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
|
||||
var indexSearchers = inputIndexSearchers.shards();
|
||||
Query luceneAdditionalQuery;
|
||||
try {
|
||||
luceneAdditionalQuery = localQueryParams.query();
|
||||
|
|
|
@ -50,7 +50,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
* Generate "more like this" similarity queries. Based on this mail:
|
||||
*
|
||||
* <pre><code>
|
||||
* Lucene does let you access the document frequency of terms, with BigCompositeReader.docFreq().
|
||||
* Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
|
||||
* Term frequencies can be computed by re-tokenizing the text, which, for a single document,
|
||||
* is usually fast enough. But looking up the docFreq() of every term in the document is
|
||||
* probably too slow.
|
||||
|
@ -84,7 +84,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
* usage is as follows. The bold fragment is specific to this class. <br>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* BigCompositeReader ir = ...
|
||||
* IndexReader ir = ...
|
||||
* IndexSearcher is = ...
|
||||
*
|
||||
* MoreLikeThis mlt = new MoreLikeThis(ir);
|
||||
|
@ -264,7 +264,7 @@ public final class MultiMoreLikeThis {
|
|||
/** For idf() calculations. */
|
||||
private TFIDFSimilarity similarity; // = new DefaultSimilarity();
|
||||
|
||||
/** BigCompositeReader to use */
|
||||
/** IndexReader to use */
|
||||
private final BigCompositeReader<?> ir;
|
||||
|
||||
/** Boost factor to use when boosting the terms */
|
||||
|
@ -289,7 +289,7 @@ public final class MultiMoreLikeThis {
|
|||
this.boostFactor = boostFactor;
|
||||
}
|
||||
|
||||
/** Constructor requiring a BigCompositeReader. */
|
||||
/** Constructor requiring an IndexReader. */
|
||||
public MultiMoreLikeThis(BigCompositeReader<?> ir) {
|
||||
this(ir, new ClassicSimilarity());
|
||||
}
|
||||
|
@ -401,7 +401,7 @@ public final class MultiMoreLikeThis {
|
|||
* be still considered relevant.
|
||||
*/
|
||||
public void setMaxDocFreqPct(long maxPercentage) {
|
||||
setMaxDocFreq((maxPercentage) * ir.maxDoc() / 100L);
|
||||
setMaxDocFreq(maxPercentage * ir.maxDoc() / 100L);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -437,7 +437,7 @@ public final class MultiMoreLikeThis {
|
|||
|
||||
/**
|
||||
* Sets the field names that will be used when generating the 'More Like This' query. Set this to
|
||||
* null for the field names to be determined at runtime from the BigCompositeReader provided in the
|
||||
* null for the field names to be determined at runtime from the IndexReader provided in the
|
||||
* constructor.
|
||||
*
|
||||
* @param fieldNames the field names that will be used when generating the 'More Like This' query.
|
||||
|
|
|
@ -3,6 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
|
|||
import io.net5.buffer.api.Send;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
public class AdaptiveLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
|
|
|
@ -28,7 +28,8 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
|
|||
} else if (queryParams.isSorted() || queryParams.isScored()) {
|
||||
return scoredSimpleLuceneShardSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
||||
} else {
|
||||
if (((long) queryParams.offset() + (long) queryParams.limit()) <= (long) queryParams.pageLimits().getPageLimit(0)) {
|
||||
if (((long) queryParams.offset() + (long) queryParams.limit()) <= (long) queryParams.pageLimits().getPageLimit(0)
|
||||
|| transformer != null) {
|
||||
// Run single-page searches using the paged multi searcher
|
||||
return unsortedUnscoredPagedLuceneMultiSearcher.collectMulti(indexSearchersMono, queryParams, keyFieldName, transformer);
|
||||
} else {
|
||||
|
|
|
@ -4,6 +4,9 @@ import io.net5.buffer.api.Send;
|
|||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
@ -18,12 +21,18 @@ public class CountLuceneLocalSearcher implements LuceneLocalSearcher {
|
|||
return Mono
|
||||
.usingWhen(
|
||||
indexSearcherMono,
|
||||
indexSearcher -> Mono.fromCallable(() -> {
|
||||
try (var is = indexSearcher.receive()) {
|
||||
LLUtils.ensureBlocking();
|
||||
return is.getIndexSearcher().count(queryParams.query());
|
||||
}
|
||||
}).subscribeOn(Schedulers.boundedElastic()),
|
||||
indexSearcher -> {
|
||||
var queryParamsMono = transformer
|
||||
.transform(Mono.fromSupplier(() -> new TransformerInput(LLIndexSearchers.unsharded(indexSearcher),
|
||||
queryParams)));
|
||||
|
||||
return queryParamsMono.flatMap(queryParams2 -> Mono.fromCallable(() -> {
|
||||
try (var is = indexSearcher.receive()) {
|
||||
LLUtils.ensureBlocking();
|
||||
return is.getIndexSearcher().count(queryParams2.query());
|
||||
}
|
||||
}).subscribeOn(Schedulers.boundedElastic()));
|
||||
},
|
||||
is -> Mono.empty()
|
||||
)
|
||||
.map(count -> new LuceneSearchResult(TotalHitsCount.of(count, true), Flux.empty(), null).send())
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
@ -10,7 +11,7 @@ public interface LLSearchTransformer {
|
|||
LLSearchTransformer NO_TRANSFORMATION = queryParamsMono -> queryParamsMono
|
||||
.map(TransformerInput::queryParams);
|
||||
|
||||
record TransformerInput(List<LLIndexSearcher> indexSearchers,
|
||||
record TransformerInput(LLIndexSearchers indexSearchers,
|
||||
LocalQueryParams queryParams) {}
|
||||
|
||||
Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono);
|
||||
|
|
|
@ -11,6 +11,7 @@ import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
|||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers.UnshardedIndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -37,18 +38,24 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
|||
|
||||
var indexSearchersMono = indexSearcherMono.map(LLIndexSearchers::unsharded);
|
||||
|
||||
return LLUtils.usingResource(indexSearchersMono, indexSearchers -> this
|
||||
// Search first page results
|
||||
.searchFirstPage(indexSearchers.shards(), queryParams, paginationInfo)
|
||||
// Compute the results of the first page
|
||||
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers.shards(),
|
||||
keyFieldName, queryParams))
|
||||
// Compute other results
|
||||
.transform(firstResult -> this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams,
|
||||
keyFieldName, indexSearchers::close))
|
||||
// Ensure that one LuceneSearchResult is always returned
|
||||
.single(),
|
||||
false);
|
||||
return LLUtils.usingResource(indexSearchersMono, indexSearchers -> {
|
||||
var queryParamsMono = transformer
|
||||
.transform(Mono.fromSupplier(() -> new TransformerInput(indexSearchers, queryParams)));
|
||||
|
||||
return queryParamsMono.flatMap(queryParams2 -> this
|
||||
// Search first page results
|
||||
.searchFirstPage(indexSearchers.shards(), queryParams2, paginationInfo)
|
||||
// Compute the results of the first page
|
||||
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers.shards(),
|
||||
keyFieldName, queryParams2))
|
||||
// Compute other results
|
||||
.transform(firstResult -> this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams2,
|
||||
keyFieldName, indexSearchers::close))
|
||||
// Ensure that one LuceneSearchResult is always returned
|
||||
.single()
|
||||
);
|
||||
},
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -7,6 +7,7 @@ import it.cavallium.dbengine.database.LLUtils;
|
|||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer.TransformerInput;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
@ -25,55 +26,62 @@ public class SimpleUnsortedUnscoredLuceneMultiSearcher implements LuceneMultiSea
|
|||
LocalQueryParams queryParams,
|
||||
String keyFieldName,
|
||||
LLSearchTransformer transformer) {
|
||||
var indexSearchersSendResource = Mono
|
||||
.fromRunnable(() -> {
|
||||
LLUtils.ensureBlocking();
|
||||
if (queryParams.isSorted() && queryParams.limit() > 0) {
|
||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||
}
|
||||
if (queryParams.isScored() && queryParams.limit() > 0) {
|
||||
throw new UnsupportedOperationException("Scored queries are not supported"
|
||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||
}
|
||||
})
|
||||
.then(indexSearchersMono);
|
||||
var localQueryParams = getLocalQueryParams(queryParams);
|
||||
|
||||
return LLUtils.usingSendResource(indexSearchersSendResource,
|
||||
indexSearchers -> Flux
|
||||
.fromIterable(indexSearchers.shards())
|
||||
.flatMap(searcher -> {
|
||||
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
||||
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
||||
})
|
||||
.collectList()
|
||||
.map(results -> {
|
||||
List<LuceneSearchResult> resultsToDrop = new ArrayList<>(results.size());
|
||||
List<Flux<LLKeyScore>> resultsFluxes = new ArrayList<>(results.size());
|
||||
boolean exactTotalHitsCount = true;
|
||||
long totalHitsCountValue = 0;
|
||||
for (Send<LuceneSearchResult> resultToReceive : results) {
|
||||
LuceneSearchResult result = resultToReceive.receive();
|
||||
resultsToDrop.add(result);
|
||||
resultsFluxes.add(result.results());
|
||||
exactTotalHitsCount &= result.totalHitsCount().exact();
|
||||
totalHitsCountValue += result.totalHitsCount().value();
|
||||
return LLUtils.usingSendResource(indexSearchersMono,
|
||||
indexSearchers -> {
|
||||
var queryParamsMono = transformer
|
||||
.transform(Mono.fromSupplier(() -> new TransformerInput(indexSearchers,
|
||||
queryParams)));
|
||||
|
||||
return queryParamsMono.flatMap(queryParams2 -> {
|
||||
var localQueryParams = getLocalQueryParams(queryParams2);
|
||||
return Mono
|
||||
.fromRunnable(() -> {
|
||||
LLUtils.ensureBlocking();
|
||||
if (queryParams2.isSorted() && queryParams2.limit() > 0) {
|
||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||
}
|
||||
if (queryParams2.isScored() && queryParams2.limit() > 0) {
|
||||
throw new UnsupportedOperationException("Scored queries are not supported"
|
||||
+ " by SimpleUnsortedUnscoredLuceneMultiSearcher");
|
||||
}
|
||||
})
|
||||
.thenMany(Flux.fromIterable(indexSearchers.shards()))
|
||||
.flatMap(searcher -> {
|
||||
var llSearcher = Mono.fromCallable(() -> new LLIndexSearcher(searcher, false, null).send());
|
||||
return localSearcher.collect(llSearcher, localQueryParams, keyFieldName, transformer);
|
||||
})
|
||||
.collectList()
|
||||
.map(results -> {
|
||||
List<LuceneSearchResult> resultsToDrop = new ArrayList<>(results.size());
|
||||
List<Flux<LLKeyScore>> resultsFluxes = new ArrayList<>(results.size());
|
||||
boolean exactTotalHitsCount = true;
|
||||
long totalHitsCountValue = 0;
|
||||
for (Send<LuceneSearchResult> resultToReceive : results) {
|
||||
LuceneSearchResult result = resultToReceive.receive();
|
||||
resultsToDrop.add(result);
|
||||
resultsFluxes.add(result.results());
|
||||
exactTotalHitsCount &= result.totalHitsCount().exact();
|
||||
totalHitsCountValue += result.totalHitsCount().value();
|
||||
}
|
||||
|
||||
var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount);
|
||||
Flux<LLKeyScore> mergedFluxes = Flux
|
||||
.merge(resultsFluxes)
|
||||
.skip(queryParams2.offset())
|
||||
.take(queryParams2.limit(), true);
|
||||
|
||||
return new LuceneSearchResult(totalHitsCount, mergedFluxes, () -> {
|
||||
for (LuceneSearchResult luceneSearchResult : resultsToDrop) {
|
||||
luceneSearchResult.close();
|
||||
}
|
||||
indexSearchers.close();
|
||||
}).send();
|
||||
});
|
||||
}
|
||||
|
||||
var totalHitsCount = new TotalHitsCount(totalHitsCountValue, exactTotalHitsCount);
|
||||
Flux<LLKeyScore> mergedFluxes = Flux
|
||||
.merge(resultsFluxes)
|
||||
.skip(queryParams.offset())
|
||||
.take(queryParams.limit(), true);
|
||||
|
||||
return new LuceneSearchResult(totalHitsCount, mergedFluxes, () -> {
|
||||
for (LuceneSearchResult luceneSearchResult : resultsToDrop) {
|
||||
luceneSearchResult.close();
|
||||
}
|
||||
indexSearchers.close();
|
||||
}).send();
|
||||
}),
|
||||
);
|
||||
},
|
||||
false
|
||||
);
|
||||
}
|
||||
|
|
|
@ -39,6 +39,10 @@ public class UnsortedUnscoredContinuousLuceneMultiSearcher implements LuceneMult
|
|||
var indexSearchersSendResource = Mono
|
||||
.fromRunnable(() -> {
|
||||
LLUtils.ensureBlocking();
|
||||
if (transformer != null) {
|
||||
throw new UnsupportedOperationException("Transformers are not supported"
|
||||
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
|
||||
}
|
||||
if (queryParams.isSorted() && queryParams.limit() > 0) {
|
||||
throw new UnsupportedOperationException("Sorted queries are not supported"
|
||||
+ " by UnsortedUnscoredContinuousLuceneMultiSearcher");
|
||||
|
|
Loading…
Reference in New Issue