Implement offsets in lucene search

This commit is contained in:
Andrea Cavalli 2021-04-01 19:48:25 +02:00
parent 3e6573d955
commit 918ff71091
16 changed files with 394 additions and 200 deletions

View File

@ -175,6 +175,7 @@ versions:
QueryParams: QueryParams:
data: data:
query: Query query: Query
offset: long
limit: long limit: long
minCompetitiveScore: -float minCompetitiveScore: -float
sort: Sort sort: Sort

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams; import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query; import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLLuceneIndex; import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLScoreMode; import it.cavallium.dbengine.database.LLScoreMode;
import it.cavallium.dbengine.database.LLSearchResult; import it.cavallium.dbengine.database.LLSearchResult;
@ -78,9 +79,26 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
return luceneIndex.deleteAll(); return luceneIndex.deleteAll();
} }
private static QueryParams fixOffset(LLLuceneIndex luceneIndex, QueryParams queryParams) {
if (luceneIndex.supportsOffset()) {
return queryParams;
} else {
return queryParams.setOffset(0);
}
}
private static long fixTransformOffset(LLLuceneIndex luceneIndex, long offset) {
if (luceneIndex.supportsOffset()) {
return 0;
} else {
return offset;
}
}
private Mono<SearchResultKeys<T>> transformLuceneResult(LLSearchResult llSearchResult, private Mono<SearchResultKeys<T>> transformLuceneResult(LLSearchResult llSearchResult,
@Nullable MultiSort<SearchResultKey<T>> sort, @Nullable MultiSort<SearchResultKey<T>> sort,
LLScoreMode scoreMode, LLScoreMode scoreMode,
long offset,
@Nullable Long limit) { @Nullable Long limit) {
Flux<SearchResultKeys<T>> mappedKeys = llSearchResult Flux<SearchResultKeys<T>> mappedKeys = llSearchResult
.getResults() .getResults()
@ -104,12 +122,13 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
} else { } else {
mappedSort = null; mappedSort = null;
} }
return LuceneUtils.mergeSignalStreamKeys(mappedKeys, mappedSort, limit); return LuceneUtils.mergeSignalStreamKeys(mappedKeys, mappedSort, offset, limit);
} }
private Mono<SearchResult<T, U>> transformLuceneResultWithValues(LLSearchResult llSearchResult, private Mono<SearchResult<T, U>> transformLuceneResultWithValues(LLSearchResult llSearchResult,
@Nullable MultiSort<SearchResultItem<T, U>> sort, @Nullable MultiSort<SearchResultItem<T, U>> sort,
LLScoreMode scoreMode, LLScoreMode scoreMode,
long offset,
@Nullable Long limit, @Nullable Long limit,
ValueGetter<T, U> valueGetter) { ValueGetter<T, U> valueGetter) {
Flux<SearchResult<T, U>> mappedKeys = llSearchResult Flux<SearchResult<T, U>> mappedKeys = llSearchResult
@ -135,7 +154,7 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
} else { } else {
mappedSort = null; mappedSort = null;
} }
return LuceneUtils.mergeSignalStreamItems(mappedKeys, mappedSort, limit); return LuceneUtils.mergeSignalStreamItems(mappedKeys, mappedSort, offset, limit);
} }
/** /**
@ -152,15 +171,17 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
Flux<Tuple2<String, Set<String>>> mltDocumentFields Flux<Tuple2<String, Set<String>>> mltDocumentFields
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
return luceneIndex return luceneIndex
.moreLikeThis(resolveSnapshot(queryParams.getSnapshot()), queryParams.toQueryParams(), indicizer.getKeyFieldName(), mltDocumentFields) .moreLikeThis(resolveSnapshot(queryParams.getSnapshot()), fixOffset(luceneIndex, queryParams.toQueryParams()), indicizer.getKeyFieldName(), mltDocumentFields)
.flatMap(llSearchResult -> this.transformLuceneResult(llSearchResult, .flatMap(llSearchResult -> this.transformLuceneResult(llSearchResult,
queryParams.getSort(), queryParams.getSort(),
queryParams.getScoreMode(), queryParams.getScoreMode(),
fixTransformOffset(luceneIndex, queryParams.getOffset()),
queryParams.getLimit() queryParams.getLimit()
)); ));
} }
/** /**
* *
* @param queryParams the limit is valid for each lucene instance. * @param queryParams the limit is valid for each lucene instance.
@ -177,13 +198,14 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
return luceneIndex return luceneIndex
.moreLikeThis(resolveSnapshot(queryParams.getSnapshot()), .moreLikeThis(resolveSnapshot(queryParams.getSnapshot()),
queryParams.toQueryParams(), fixOffset(luceneIndex, queryParams.toQueryParams()),
indicizer.getKeyFieldName(), indicizer.getKeyFieldName(),
mltDocumentFields mltDocumentFields
) )
.flatMap(llSearchResult -> this.transformLuceneResultWithValues(llSearchResult, .flatMap(llSearchResult -> this.transformLuceneResultWithValues(llSearchResult,
queryParams.getSort(), queryParams.getSort(),
queryParams.getScoreMode(), queryParams.getScoreMode(),
fixTransformOffset(luceneIndex, queryParams.getOffset()),
queryParams.getLimit(), queryParams.getLimit(),
valueGetter valueGetter
)); ));
@ -199,10 +221,14 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
public Mono<SearchResultKeys<T>> search( public Mono<SearchResultKeys<T>> search(
ClientQueryParams<SearchResultKey<T>> queryParams) { ClientQueryParams<SearchResultKey<T>> queryParams) {
return luceneIndex return luceneIndex
.search(resolveSnapshot(queryParams.getSnapshot()), queryParams.toQueryParams(), indicizer.getKeyFieldName()) .search(resolveSnapshot(queryParams.getSnapshot()),
fixOffset(luceneIndex, queryParams.toQueryParams()),
indicizer.getKeyFieldName()
)
.flatMap(llSearchResult -> this.transformLuceneResult(llSearchResult, .flatMap(llSearchResult -> this.transformLuceneResult(llSearchResult,
queryParams.getSort(), queryParams.getSort(),
queryParams.getScoreMode(), queryParams.getScoreMode(),
fixTransformOffset(luceneIndex, queryParams.getOffset()),
queryParams.getLimit() queryParams.getLimit()
)); ));
} }
@ -218,10 +244,11 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
ClientQueryParams<SearchResultItem<T, U>> queryParams, ClientQueryParams<SearchResultItem<T, U>> queryParams,
ValueGetter<T, U> valueGetter) { ValueGetter<T, U> valueGetter) {
return luceneIndex return luceneIndex
.search(resolveSnapshot(queryParams.getSnapshot()), queryParams.toQueryParams(), indicizer.getKeyFieldName()) .search(resolveSnapshot(queryParams.getSnapshot()), fixOffset(luceneIndex, queryParams.toQueryParams()), indicizer.getKeyFieldName())
.flatMap(llSearchResult -> this.transformLuceneResultWithValues(llSearchResult, .flatMap(llSearchResult -> this.transformLuceneResultWithValues(llSearchResult,
queryParams.getSort(), queryParams.getSort(),
queryParams.getScoreMode(), queryParams.getScoreMode(),
fixTransformOffset(luceneIndex, queryParams.getOffset()),
queryParams.getLimit(), queryParams.getLimit(),
valueGetter valueGetter
)); ));

View File

@ -35,6 +35,9 @@ public final class ClientQueryParams<T> {
@NonNull @NonNull
private Query query; private Query query;
@Default
private long offset = 0;
@Default @Default
private long limit = Long.MAX_VALUE; private long limit = Long.MAX_VALUE;
@ -75,6 +78,7 @@ public final class ClientQueryParams<T> {
.query(getQuery()) .query(getQuery())
.sort(getSort() != null ? getSort().getQuerySort() : NoSort.of()) .sort(getSort() != null ? getSort().getQuerySort() : NoSort.of())
.minCompetitiveScore(Nullablefloat.ofNullable(getMinCompetitiveScore())) .minCompetitiveScore(Nullablefloat.ofNullable(getMinCompetitiveScore()))
.offset(getOffset())
.limit(getLimit()) .limit(getLimit())
.scoreMode(toScoreMode()) .scoreMode(toScoreMode())
.build(); .build();

View File

@ -49,15 +49,17 @@ public interface LLLuceneIndex extends LLSnapshottable {
Mono<LLSearchResult> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName); Mono<LLSearchResult> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName);
default Mono<Long> count(@Nullable LLSnapshot snapshot, Query query) { default Mono<Long> count(@Nullable LLSnapshot snapshot, Query query) {
QueryParams params = QueryParams.of(query, 0, Nullablefloat.empty(), NoSort.of(), ScoreMode.of(false, false)); QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), ScoreMode.of(false, false));
return Mono.from(this.search(snapshot, params, null) return Mono.from(this.search(snapshot, params, null)
.flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results.getResults(), null, null)) .flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results.getResults(), null, 0, null))
.map(LLSearchResultShard::getTotalHitsCount) .map(LLSearchResultShard::getTotalHitsCount)
.defaultIfEmpty(0L)); .defaultIfEmpty(0L));
} }
boolean isLowMemoryMode(); boolean isLowMemoryMode();
boolean supportsOffset();
Mono<Void> close(); Mono<Void> close();
/** /**

View File

@ -19,6 +19,7 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.searcher.AdaptiveStreamSearcher; import it.cavallium.dbengine.lucene.searcher.AdaptiveStreamSearcher;
import it.cavallium.dbengine.lucene.searcher.AllowOnlyQueryParsingCollectorStreamSearcher; import it.cavallium.dbengine.lucene.searcher.AllowOnlyQueryParsingCollectorStreamSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneSearchInstance;
import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher; import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher.HandleResult; import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher.HandleResult;
import java.io.IOException; import java.io.IOException;
@ -451,6 +452,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
.subscribeOn(luceneQueryScheduler) .subscribeOn(luceneQueryScheduler)
.map(luceneQuery -> luceneSearch(doDistributedPre, .map(luceneQuery -> luceneSearch(doDistributedPre,
indexSearcher, indexSearcher,
queryParams.getOffset(),
queryParams.getLimit(), queryParams.getLimit(),
queryParams.getMinCompetitiveScore().getNullable(), queryParams.getMinCompetitiveScore().getNullable(),
keyFieldName, keyFieldName,
@ -510,6 +512,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
return luceneSearch(doDistributedPre, return luceneSearch(doDistributedPre,
indexSearcher, indexSearcher,
queryParams.getOffset(),
queryParams.getLimit(), queryParams.getLimit(),
queryParams.getMinCompetitiveScore().getNullable(), queryParams.getMinCompetitiveScore().getNullable(),
keyFieldName, keyFieldName,
@ -534,6 +537,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
private LLSearchResult luceneSearch(boolean doDistributedPre, private LLSearchResult luceneSearch(boolean doDistributedPre,
IndexSearcher indexSearcher, IndexSearcher indexSearcher,
long offset,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName,
@ -543,26 +547,29 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
ScoreMode luceneScoreMode) { ScoreMode luceneScoreMode) {
return new LLSearchResult(Mono.<LLSearchResultShard>create(monoSink -> { return new LLSearchResult(Mono.<LLSearchResultShard>create(monoSink -> {
LuceneSearchInstance luceneSearchInstance;
long totalHitsCount; long totalHitsCount;
try { try {
if (!doDistributedPre) { if (doDistributedPre) {
AtomicLong totalHitsCountAtomic = new AtomicLong(0);
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
streamSearcher.search(indexSearcher, allowOnlyQueryParsingCollectorStreamSearcher.search(indexSearcher, luceneQuery);
monoSink.success(new LLSearchResultShard(Flux.empty(), 0));
return;
} else {
int boundedOffset = Math.max(0, offset > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) offset);
int boundedLimit = Math.max(0, limit > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) limit);
//noinspection BlockingMethodInNonBlockingContext
luceneSearchInstance = streamSearcher.search(indexSearcher,
luceneQuery, luceneQuery,
0, boundedOffset,
boundedLimit,
luceneSort, luceneSort,
luceneScoreMode, luceneScoreMode,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName
keyScore -> HandleResult.HALT,
totalHitsCountAtomic::set
); );
totalHitsCount = totalHitsCountAtomic.get();
} else {
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
allowOnlyQueryParsingCollectorStreamSearcher.search(indexSearcher, luceneQuery); totalHitsCount = luceneSearchInstance.getTotalHitsCount();
totalHitsCount = 0;
} }
} catch (Exception ex) { } catch (Exception ex) {
monoSink.error(ex); monoSink.error(ex);
@ -590,18 +597,8 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
}); });
try { try {
if (!doDistributedPre) {
int boundedLimit = Math.max(0, limit > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) limit);
AtomicLong atomicTotalHitsCount = new AtomicLong(0);
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
streamSearcher.search(indexSearcher, luceneSearchInstance.getResults(keyScore -> {
luceneQuery,
boundedLimit,
luceneSort,
luceneScoreMode,
minCompetitiveScore,
keyFieldName,
keyScore -> {
try { try {
if (cancelled.get()) { if (cancelled.get()) {
return HandleResult.HALT; return HandleResult.HALT;
@ -623,10 +620,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
requests.release(Integer.MAX_VALUE); requests.release(Integer.MAX_VALUE);
return HandleResult.HALT; return HandleResult.HALT;
} }
}, });
atomicTotalHitsCount::set
);
}
sink.complete(); sink.complete();
} catch (Exception ex) { } catch (Exception ex) {
sink.error(ex); sink.error(ex);
@ -720,4 +714,9 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
public boolean isLowMemoryMode() { public boolean isLowMemoryMode() {
return lowMemory; return lowMemory;
} }
@Override
public boolean supportsOffset() {
return true;
}
} }

View File

@ -212,6 +212,9 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
QueryParams queryParams, QueryParams queryParams,
String keyFieldName, String keyFieldName,
Flux<Tuple2<String, Set<String>>> mltDocumentFields) { Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
if (queryParams.getOffset() != 0) {
return Mono.error(new IllegalArgumentException("MultiLuceneIndex requires an offset equal to 0"));
}
long actionId; long actionId;
int scoreDivisor; int scoreDivisor;
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsShared; Flux<Tuple2<String, Set<String>>> mltDocumentFieldsShared;
@ -284,6 +287,9 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
long actionId; long actionId;
int scoreDivisor; int scoreDivisor;
Mono<Void> distributedPre; Mono<Void> distributedPre;
if (queryParams.getOffset() != 0) {
return Mono.error(new IllegalArgumentException("MultiLuceneIndex requires an offset equal to 0"));
}
if (luceneIndices.length <= 1 || !queryParams.getScoreMode().getComputeScores()) { if (luceneIndices.length <= 1 || !queryParams.getScoreMode().getComputeScores()) {
actionId = -1; actionId = -1;
scoreDivisor = 1; scoreDivisor = 1;
@ -410,4 +416,9 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
public boolean isLowMemoryMode() { public boolean isLowMemoryMode() {
return luceneIndices[0].isLowMemoryMode(); return luceneIndices[0].isLowMemoryMode();
} }
@Override
public boolean supportsOffset() {
return false;
}
} }

View File

@ -181,6 +181,7 @@ public class LuceneUtils {
*/ */
public static <T> Flux<T> mergeStream(Flux<Flux<T>> mappedMultiResults, public static <T> Flux<T> mergeStream(Flux<Flux<T>> mappedMultiResults,
@Nullable MultiSort<T> sort, @Nullable MultiSort<T> sort,
long offset,
@Nullable Long limit) { @Nullable Long limit) {
if (limit != null && limit == 0) { if (limit != null && limit == 0) {
return mappedMultiResults.flatMap(f -> f).ignoreElements().flux(); return mappedMultiResults.flatMap(f -> f).ignoreElements().flux();
@ -193,10 +194,16 @@ public class LuceneUtils {
//noinspection unchecked //noinspection unchecked
mergedFlux = Flux.mergeOrdered(32, sort.getResultSort(), mappedMultiResultsList.toArray(Flux[]::new)); mergedFlux = Flux.mergeOrdered(32, sort.getResultSort(), mappedMultiResultsList.toArray(Flux[]::new));
} }
if (limit == null || limit == Long.MAX_VALUE) { Flux<T> offsetedFlux;
return mergedFlux; if (offset > 0) {
offsetedFlux = mergedFlux.skip(offset);
} else { } else {
return mergedFlux.limitRequest(limit); offsetedFlux = mergedFlux;
}
if (limit == null || limit == Long.MAX_VALUE) {
return offsetedFlux;
} else {
return offsetedFlux.limitRequest(limit);
} }
}); });
} }
@ -236,31 +243,34 @@ public class LuceneUtils {
public static <T> Mono<SearchResultKeys<T>> mergeSignalStreamKeys(Flux<SearchResultKeys<T>> mappedKeys, public static <T> Mono<SearchResultKeys<T>> mergeSignalStreamKeys(Flux<SearchResultKeys<T>> mappedKeys,
MultiSort<SearchResultKey<T>> sort, MultiSort<SearchResultKey<T>> sort,
long offset,
Long limit) { Long limit) {
return mappedKeys.reduce( return mappedKeys.reduce(
new SearchResultKeys<>(Flux.empty(), 0L), new SearchResultKeys<>(Flux.empty(), 0L),
(a, b) -> new SearchResultKeys<T>(LuceneUtils (a, b) -> new SearchResultKeys<T>(LuceneUtils
.mergeStream(Flux.just(a.getResults(), b.getResults()), sort, limit), a.getTotalHitsCount() + b.getTotalHitsCount()) .mergeStream(Flux.just(a.getResults(), b.getResults()), sort, offset, limit), a.getTotalHitsCount() + b.getTotalHitsCount())
); );
} }
public static <T, U> Mono<SearchResult<T, U>> mergeSignalStreamItems(Flux<SearchResult<T, U>> mappedKeys, public static <T, U> Mono<SearchResult<T, U>> mergeSignalStreamItems(Flux<SearchResult<T, U>> mappedKeys,
MultiSort<SearchResultItem<T, U>> sort, MultiSort<SearchResultItem<T, U>> sort,
long offset,
Long limit) { Long limit) {
return mappedKeys.reduce( return mappedKeys.reduce(
new SearchResult<>(Flux.empty(), 0L), new SearchResult<>(Flux.empty(), 0L),
(a, b) -> new SearchResult<T, U>(LuceneUtils (a, b) -> new SearchResult<T, U>(LuceneUtils
.mergeStream(Flux.just(a.getResults(), b.getResults()), sort, limit), a.getTotalHitsCount() + b.getTotalHitsCount()) .mergeStream(Flux.just(a.getResults(), b.getResults()), sort, offset, limit), a.getTotalHitsCount() + b.getTotalHitsCount())
); );
} }
public static Mono<LLSearchResultShard> mergeSignalStreamRaw(Flux<LLSearchResultShard> mappedKeys, public static Mono<LLSearchResultShard> mergeSignalStreamRaw(Flux<LLSearchResultShard> mappedKeys,
MultiSort<LLKeyScore> mappedSort, MultiSort<LLKeyScore> mappedSort,
long offset,
Long limit) { Long limit) {
return mappedKeys.reduce( return mappedKeys.reduce(
new LLSearchResultShard(Flux.empty(), 0), new LLSearchResultShard(Flux.empty(), 0),
(s1, s2) -> new LLSearchResultShard( (s1, s2) -> new LLSearchResultShard(
LuceneUtils.mergeStream(Flux.just(s1.getResults(), s2.getResults()), mappedSort, limit), LuceneUtils.mergeStream(Flux.just(s1.getResults(), s2.getResults()), mappedSort, offset, limit),
s1.getTotalHitsCount() + s2.getTotalHitsCount() s1.getTotalHitsCount() + s2.getTotalHitsCount()
) )
); );

View File

@ -1,7 +1,6 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongConsumer;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -21,56 +20,52 @@ public class AdaptiveStreamSearcher implements LuceneStreamSearcher {
public AdaptiveStreamSearcher() { public AdaptiveStreamSearcher() {
this.simpleStreamSearcher = new SimpleStreamSearcher(); this.simpleStreamSearcher = new SimpleStreamSearcher();
this.parallelCollectorStreamSearcher = new ParallelCollectorStreamSearcher();
this.pagedStreamSearcher = new PagedStreamSearcher(simpleStreamSearcher);
this.countStreamSearcher = new CountStreamSearcher(); this.countStreamSearcher = new CountStreamSearcher();
this.parallelCollectorStreamSearcher = new ParallelCollectorStreamSearcher(countStreamSearcher);
this.pagedStreamSearcher = new PagedStreamSearcher(simpleStreamSearcher);
} }
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer consumer,
LongConsumer totalHitsConsumer) throws IOException {
if (limit == 0) { if (limit == 0) {
totalHitsConsumer.accept(countStreamSearcher.count(indexSearcher, query)); return countStreamSearcher.count(indexSearcher, query);
} else if (luceneSort == null && ENABLE_PARALLEL_COLLECTOR) { } else if (offset == 0 && luceneSort == null && ENABLE_PARALLEL_COLLECTOR) {
parallelCollectorStreamSearcher.search(indexSearcher, return parallelCollectorStreamSearcher.search(indexSearcher,
query, query,
offset,
limit, limit,
null, null,
scoreMode, scoreMode,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName
consumer,
totalHitsConsumer
); );
} else { } else {
if (luceneSort != null && limit > PagedStreamSearcher.MAX_ITEMS_PER_PAGE) { if (offset > 0 || limit > PagedStreamSearcher.MAX_ITEMS_PER_PAGE) {
pagedStreamSearcher.search(indexSearcher, return pagedStreamSearcher.search(indexSearcher,
query, query,
offset,
limit, limit,
luceneSort, luceneSort,
scoreMode, scoreMode,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName
consumer,
totalHitsConsumer
); );
} else { } else {
simpleStreamSearcher.search(indexSearcher, return simpleStreamSearcher.search(indexSearcher,
query, query,
offset,
limit, limit,
luceneSort, luceneSort,
scoreMode, scoreMode,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName
consumer,
totalHitsConsumer
); );
} }
} }

View File

@ -2,7 +2,6 @@ package it.cavallium.dbengine.lucene.searcher;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.function.LongConsumer;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.CollectorManager;
@ -21,19 +20,18 @@ public class AllowOnlyQueryParsingCollectorStreamSearcher implements LuceneStrea
public void search(IndexSearcher indexSearcher, public void search(IndexSearcher indexSearcher,
Query query) throws IOException { Query query) throws IOException {
search(indexSearcher, query, 0, null, null, null, null, null, null); search(indexSearcher, query, 0, 0, null, null, null, null);
} }
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer resultsConsumer,
LongConsumer totalHitsConsumer) throws IOException {
if (limit > 0) { if (limit > 0) {
throw new IllegalArgumentException("Limit > 0 not allowed"); throw new IllegalArgumentException("Limit > 0 not allowed");
} }
@ -49,12 +47,6 @@ public class AllowOnlyQueryParsingCollectorStreamSearcher implements LuceneStrea
if (keyFieldName != null) { if (keyFieldName != null) {
throw new IllegalArgumentException("Key field name not allowed"); throw new IllegalArgumentException("Key field name not allowed");
} }
if (resultsConsumer != null) {
throw new IllegalArgumentException("Results consumer not allowed");
}
if (totalHitsConsumer != null) {
throw new IllegalArgumentException("Total hits consumer not allowed");
}
indexSearcher.search(query, new CollectorManager<>() { indexSearcher.search(query, new CollectorManager<>() {
@Override @Override
public Collector newCollector() { public Collector newCollector() {
@ -85,5 +77,17 @@ public class AllowOnlyQueryParsingCollectorStreamSearcher implements LuceneStrea
return null; return null;
} }
}); });
return new LuceneSearchInstance() {
@Override
public long getTotalHitsCount() throws IOException {
throw new IllegalArgumentException("Total hits consumer not allowed");
}
@Override
public void getResults(ResultItemConsumer consumer) throws IOException {
throw new IllegalArgumentException("Results consumer not allowed");
}
};
} }
} }

View File

@ -1,7 +1,6 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongConsumer;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -14,31 +13,42 @@ import org.jetbrains.annotations.Nullable;
public class CountStreamSearcher implements LuceneStreamSearcher { public class CountStreamSearcher implements LuceneStreamSearcher {
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer resultsConsumer,
LongConsumer totalHitsConsumer) throws IOException {
if (limit != 0) { if (limit != 0) {
throw new IllegalArgumentException("CountStream doesn't support a limit different than 0"); throw new IllegalArgumentException("CountStream doesn't support a limit different than 0");
} }
if (luceneSort != null) { if (luceneSort != null) {
throw new IllegalArgumentException("CountStream doesn't support sorting"); throw new IllegalArgumentException("CountStream doesn't support sorting");
} }
if (resultsConsumer != null) {
throw new IllegalArgumentException("CountStream doesn't support a results consumer");
}
if (keyFieldName != null) { if (keyFieldName != null) {
throw new IllegalArgumentException("CountStream doesn't support a key field"); throw new IllegalArgumentException("CountStream doesn't support a key field");
} }
totalHitsConsumer.accept(count(indexSearcher, query)); return count(indexSearcher, query);
} }
public long count(IndexSearcher indexSearcher, Query query) throws IOException { public long countLong(IndexSearcher indexSearcher, Query query) throws IOException {
return indexSearcher.count(query); return indexSearcher.count(query);
} }
public LuceneSearchInstance count(IndexSearcher indexSearcher, Query query) throws IOException {
long totalHitsCount = countLong(indexSearcher, query);
return new LuceneSearchInstance() {
@Override
public long getTotalHitsCount() {
return totalHitsCount;
}
@Override
public void getResults(ResultItemConsumer consumer) {
}
};
}
} }

View File

@ -0,0 +1,11 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher.ResultItemConsumer;
import java.io.IOException;
public interface LuceneSearchInstance {
long getTotalHitsCount() throws IOException;
void getResults(ResultItemConsumer consumer) throws IOException;
}

View File

@ -2,7 +2,6 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongConsumer;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -19,6 +18,7 @@ public interface LuceneStreamSearcher {
* Do a lucene query, receiving the single results using a consumer * Do a lucene query, receiving the single results using a consumer
* @param indexSearcher the index searcher, which contains all the lucene data * @param indexSearcher the index searcher, which contains all the lucene data
* @param query the query * @param query the query
* @param offset the offset of the first result (use 0 to disable offset)
* @param limit the maximum number of results * @param limit the maximum number of results
* @param luceneSort the sorting method used for the search * @param luceneSort the sorting method used for the search
* @param scoreMode score mode * @param scoreMode score mode
@ -28,15 +28,14 @@ public interface LuceneStreamSearcher {
* @param totalHitsConsumer the consumer of total count of results * @param totalHitsConsumer the consumer of total count of results
* @throws IOException thrown if there is an error * @throws IOException thrown if there is an error
*/ */
void search(IndexSearcher indexSearcher, LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException;
ResultItemConsumer resultsConsumer,
LongConsumer totalHitsConsumer) throws IOException;
@FunctionalInterface @FunctionalInterface
interface ResultItemConsumer { interface ResultItemConsumer {

View File

@ -2,7 +2,7 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongConsumer; import java.util.concurrent.atomic.AtomicReference;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
@ -18,59 +18,106 @@ import org.warp.commonutils.type.IntWrapper;
public class PagedStreamSearcher implements LuceneStreamSearcher { public class PagedStreamSearcher implements LuceneStreamSearcher {
public static final int MAX_ITEMS_PER_PAGE = 1000; public static final int MAX_ITEMS_PER_PAGE = 1000;
private final LuceneStreamSearcher baseStreamSearcher; private final SimpleStreamSearcher simpleStreamSearcher;
public PagedStreamSearcher(LuceneStreamSearcher baseStreamSearcher) { public PagedStreamSearcher(SimpleStreamSearcher simpleStreamSearcher) {
this.baseStreamSearcher = baseStreamSearcher; this.simpleStreamSearcher = simpleStreamSearcher;
} }
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer resultsConsumer,
LongConsumer totalHitsConsumer) throws IOException {
if (limit < MAX_ITEMS_PER_PAGE) { if (limit < MAX_ITEMS_PER_PAGE) {
// Use a normal search method because the limit is low // Use a normal search method because the limit is low
baseStreamSearcher.search(indexSearcher, simpleStreamSearcher.search(indexSearcher,
query, query,
offset,
limit, limit,
luceneSort, luceneSort,
scoreMode, scoreMode,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName
resultsConsumer,
totalHitsConsumer
); );
return;
} }
IntWrapper currentAllowedResults = new IntWrapper(limit); IntWrapper currentAllowedResults = new IntWrapper(limit);
// Run the first page search // Run the first page search
TopDocs lastTopDocs = indexSearcher.search(query, MAX_ITEMS_PER_PAGE, luceneSort, scoreMode != ScoreMode.COMPLETE_NO_SCORES); TopDocs firstTopDocsVal;
totalHitsConsumer.accept(lastTopDocs.totalHits.value); if (offset == 0) {
firstTopDocsVal = indexSearcher.search(query,
MAX_ITEMS_PER_PAGE,
luceneSort,
scoreMode != ScoreMode.COMPLETE_NO_SCORES
);
} else {
firstTopDocsVal = new TopDocsSearcher(indexSearcher,
query,
luceneSort,
MAX_ITEMS_PER_PAGE,
null,
scoreMode != ScoreMode.COMPLETE_NO_SCORES,
1000
).getTopDocs(offset, MAX_ITEMS_PER_PAGE);
}
AtomicReference<TopDocs> firstTopDocs = new AtomicReference<>(firstTopDocsVal);
long totalHitsCount = firstTopDocs.getPlain().totalHits.value;
return new LuceneSearchInstance() {
@Override
public long getTotalHitsCount() {
return totalHitsCount;
}
@Override
public void getResults(ResultItemConsumer resultsConsumer) throws IOException {
TopDocs lastTopDocs = firstTopDocs.getAndSet(null);
if (lastTopDocs.scoreDocs.length > 0) { if (lastTopDocs.scoreDocs.length > 0) {
ScoreDoc lastScoreDoc = getLastItem(lastTopDocs.scoreDocs); ScoreDoc lastScoreDoc = getLastItem(lastTopDocs.scoreDocs);
consumeHits(currentAllowedResults, lastTopDocs.scoreDocs, indexSearcher, minCompetitiveScore, keyFieldName, resultsConsumer); consumeHits(currentAllowedResults,
lastTopDocs.scoreDocs,
indexSearcher,
minCompetitiveScore,
keyFieldName,
resultsConsumer
);
// Run the searches for each page until the end // Run the searches for each page until the end
boolean finished = currentAllowedResults.var <= 0; boolean finished = currentAllowedResults.var <= 0;
while (!finished) { while (!finished) {
lastTopDocs = indexSearcher.searchAfter(lastScoreDoc, query, MAX_ITEMS_PER_PAGE, luceneSort, scoreMode != ScoreMode.COMPLETE_NO_SCORES); boolean halted;
lastTopDocs = indexSearcher.searchAfter(lastScoreDoc,
query,
MAX_ITEMS_PER_PAGE,
luceneSort,
scoreMode != ScoreMode.COMPLETE_NO_SCORES
);
if (lastTopDocs.scoreDocs.length > 0) { if (lastTopDocs.scoreDocs.length > 0) {
lastScoreDoc = getLastItem(lastTopDocs.scoreDocs); lastScoreDoc = getLastItem(lastTopDocs.scoreDocs);
consumeHits(currentAllowedResults, lastTopDocs.scoreDocs, indexSearcher, minCompetitiveScore, keyFieldName, resultsConsumer); halted = consumeHits(currentAllowedResults,
lastTopDocs.scoreDocs,
indexSearcher,
minCompetitiveScore,
keyFieldName,
resultsConsumer
) == HandleResult.HALT;
} else {
halted = false;
} }
if (lastTopDocs.scoreDocs.length < MAX_ITEMS_PER_PAGE || currentAllowedResults.var <= 0) { if (lastTopDocs.scoreDocs.length < MAX_ITEMS_PER_PAGE || currentAllowedResults.var <= 0 || halted) {
finished = true; finished = true;
} }
} }
} }
} }
};
}
private HandleResult consumeHits(IntWrapper currentAllowedResults, private HandleResult consumeHits(IntWrapper currentAllowedResults,
ScoreDoc[] hits, ScoreDoc[] hits,
@ -103,4 +150,5 @@ public class PagedStreamSearcher implements LuceneStreamSearcher {
private static ScoreDoc getLastItem(ScoreDoc[] scoreDocs) { private static ScoreDoc getLastItem(ScoreDoc[] scoreDocs) {
return scoreDocs[scoreDocs.length - 1]; return scoreDocs[scoreDocs.length - 1];
} }
} }

View File

@ -6,7 +6,6 @@ import it.cavallium.dbengine.lucene.LuceneParallelStreamCollectorResult;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.LongConsumer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -20,23 +19,43 @@ import org.jetbrains.annotations.Nullable;
*/ */
public class ParallelCollectorStreamSearcher implements LuceneStreamSearcher { public class ParallelCollectorStreamSearcher implements LuceneStreamSearcher {
private final CountStreamSearcher countStreamSearcher;
public ParallelCollectorStreamSearcher(CountStreamSearcher countStreamSearcher) {
this.countStreamSearcher = countStreamSearcher;
}
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer resultsConsumer, if (offset != 0) {
LongConsumer totalHitsConsumer) throws IOException { throw new IllegalArgumentException("ParallelCollectorStreamSearcher doesn't support a offset different than 0");
}
if (luceneSort != null) { if (luceneSort != null) {
throw new IllegalArgumentException("ParallelCollectorStreamSearcher doesn't support sorted searches"); throw new IllegalArgumentException("ParallelCollectorStreamSearcher doesn't support sorted searches");
} }
return new LuceneSearchInstance() {
long totalHitsCount = countStreamSearcher.countLong(indexSearcher, query);
@Override
public long getTotalHitsCount() throws IOException {
return totalHitsCount;
}
@Override
public void getResults(ResultItemConsumer resultsConsumer) throws IOException {
AtomicInteger currentCount = new AtomicInteger(); AtomicInteger currentCount = new AtomicInteger();
LuceneParallelStreamCollectorResult result = indexSearcher.search(query, LuceneParallelStreamCollectorManager.fromConsumer(scoreMode, minCompetitiveScore, (docId, score) -> { LuceneParallelStreamCollectorResult result = indexSearcher.search(query,
LuceneParallelStreamCollectorManager.fromConsumer(scoreMode, minCompetitiveScore, (docId, score) -> {
if (currentCount.getAndIncrement() >= limit) { if (currentCount.getAndIncrement() >= limit) {
return HandleResult.HALT; return HandleResult.HALT;
} else { } else {
@ -63,7 +82,8 @@ public class ParallelCollectorStreamSearcher implements LuceneStreamSearcher {
return HandleResult.CONTINUE; return HandleResult.CONTINUE;
} }
})); }));
//todo: check the accuracy of our hits counter! this.totalHitsCount = result.getTotalHitsCount();
totalHitsConsumer.accept(result.getTotalHitsCount()); }
};
} }
} }

View File

@ -3,13 +3,11 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongConsumer;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
/** /**
@ -18,23 +16,31 @@ import org.jetbrains.annotations.Nullable;
public class SimpleStreamSearcher implements LuceneStreamSearcher { public class SimpleStreamSearcher implements LuceneStreamSearcher {
@Override @Override
public void search(IndexSearcher indexSearcher, public LuceneSearchInstance search(IndexSearcher indexSearcher,
Query query, Query query,
int offset,
int limit, int limit,
@Nullable Sort luceneSort, @Nullable Sort luceneSort,
ScoreMode scoreMode, ScoreMode scoreMode,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName) throws IOException {
ResultItemConsumer resultsConsumer, var searcher = new TopDocsSearcher(indexSearcher,
LongConsumer totalHitsConsumer) throws IOException { query,
TopDocs topDocs; luceneSort,
if (luceneSort != null) { offset + limit,
topDocs = indexSearcher.search(query, limit, luceneSort, scoreMode != ScoreMode.COMPLETE_NO_SCORES); null,
} else { scoreMode != ScoreMode.COMPLETE_NO_SCORES,
topDocs = indexSearcher.search(query, limit); 1000
);
return new LuceneSearchInstance() {
@Override
public long getTotalHitsCount() throws IOException {
return searcher.getTopDocs(0, 1).totalHits.value;
} }
totalHitsConsumer.accept(topDocs.totalHits.value);
var hits = ObjectArrayList.wrap(topDocs.scoreDocs); @Override
public void getResults(ResultItemConsumer resultsConsumer) throws IOException {
ObjectArrayList<ScoreDoc> hits = ObjectArrayList.wrap(searcher.getTopDocs(offset, limit).scoreDocs);
for (ScoreDoc hit : hits) { for (ScoreDoc hit : hits) {
int docId = hit.doc; int docId = hit.doc;
float score = hit.score; float score = hit.score;
@ -50,4 +56,6 @@ public class SimpleStreamSearcher implements LuceneStreamSearcher {
} }
} }
} }
};
}
} }

View File

@ -0,0 +1,45 @@
package it.cavallium.dbengine.lucene.searcher;
import java.io.IOException;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
class TopDocsSearcher {
private final TopDocsCollector<?> collector;
private final boolean doDocScores;
private final IndexSearcher indexSearcher;
private final Query query;
public TopDocsSearcher(IndexSearcher indexSearcher,
Query query,
Sort luceneSort,
int limit,
FieldDoc after,
boolean doDocScores,
int totalHitsThreshold) throws IOException {
if (luceneSort == null) {
this.collector = TopScoreDocCollector.create(limit, after, totalHitsThreshold);
} else {
this.collector = TopFieldCollector.create(luceneSort, limit, after, totalHitsThreshold);
}
this.indexSearcher = indexSearcher;
this.query = query;
this.doDocScores = doDocScores;
indexSearcher.search(query, collector);
}
public TopDocs getTopDocs(int offset, int length) throws IOException {
TopDocs topDocs = collector.topDocs(offset, length);
if (doDocScores) {
TopFieldCollector.populateScores(topDocs.scoreDocs, indexSearcher, query);
}
return topDocs;
}
}