Total hits with precision

This commit is contained in:
Andrea Cavalli 2021-08-04 01:12:39 +02:00
parent bcd99f4727
commit 9734d78839
16 changed files with 49 additions and 19 deletions

View File

@ -196,3 +196,7 @@ versions:
data: data:
onlyTopScores: boolean onlyTopScores: boolean
computeScores: boolean computeScores: boolean
TotalHitsCount:
data:
value: long
exact: boolean

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams; import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query; import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.Delta; import it.cavallium.dbengine.database.Delta;
import it.cavallium.dbengine.database.LLSnapshottable; import it.cavallium.dbengine.database.LLSnapshottable;
import it.cavallium.dbengine.database.collections.ValueGetter; import it.cavallium.dbengine.database.collections.ValueGetter;
@ -80,7 +81,7 @@ public interface LuceneIndex<T, U> extends LLSnapshottable {
Mono<SearchResult<T, U>> searchWithTransformer(ClientQueryParams<SearchResultItem<T, U>> queryParams, Mono<SearchResult<T, U>> searchWithTransformer(ClientQueryParams<SearchResultItem<T, U>> queryParams,
ValueTransformer<T, U> valueTransformer); ValueTransformer<T, U> valueTransformer);
Mono<Long> count(@Nullable CompositeSnapshot snapshot, Query query); Mono<TotalHitsCount> count(@Nullable CompositeSnapshot snapshot, Query query);
boolean isLowMemoryMode(); boolean isLowMemoryMode();

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams; import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query; import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLLuceneIndex; import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard; import it.cavallium.dbengine.database.LLSearchResultShard;
@ -190,7 +191,7 @@ public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
} }
@Override @Override
public Mono<Long> count(@Nullable CompositeSnapshot snapshot, Query query) { public Mono<TotalHitsCount> count(@Nullable CompositeSnapshot snapshot, Query query) {
return this return this
.search(ClientQueryParams.<SearchResultKey<T>>builder().snapshot(snapshot).query(query).limit(0).build()) .search(ClientQueryParams.<SearchResultKey<T>>builder().snapshot(snapshot).query(query).limit(0).build())
.flatMap(tSearchResultKeys -> tSearchResultKeys.release().thenReturn(tSearchResultKeys.totalHitsCount())); .flatMap(tSearchResultKeys -> tSearchResultKeys.release().thenReturn(tSearchResultKeys.totalHitsCount()));

View File

@ -1,12 +1,13 @@
package it.cavallium.dbengine.client; package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public record SearchResult<T, U>(Flux<SearchResultItem<T, U>> results, long totalHitsCount, Mono<Void> release) { public record SearchResult<T, U>(Flux<SearchResultItem<T, U>> results, TotalHitsCount totalHitsCount, Mono<Void> release) {
public static <T, U> SearchResult<T, U> empty() { public static <T, U> SearchResult<T, U> empty() {
return new SearchResult<>(Flux.empty(), 0L, Mono.empty()); return new SearchResult<>(Flux.empty(), TotalHitsCount.of(0, true), Mono.empty());
} }
public Flux<SearchResultItem<T, U>> resultsThenRelease() { public Flux<SearchResultItem<T, U>> resultsThenRelease() {

View File

@ -1,15 +1,16 @@
package it.cavallium.dbengine.client; package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.collections.ValueGetter; import it.cavallium.dbengine.database.collections.ValueGetter;
import org.reactivestreams.Publisher; import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
@SuppressWarnings("unused") @SuppressWarnings("unused")
public record SearchResultKeys<T>(Flux<SearchResultKey<T>> results, long totalHitsCount, Mono<Void> release) { public record SearchResultKeys<T>(Flux<SearchResultKey<T>> results, TotalHitsCount totalHitsCount, Mono<Void> release) {
public static <T, U> SearchResultKeys<T> empty() { public static <T, U> SearchResultKeys<T> empty() {
return new SearchResultKeys<>(Flux.empty(), 0L, Mono.empty()); return new SearchResultKeys<>(Flux.empty(), TotalHitsCount.of(0, true), Mono.empty());
} }
public <U> SearchResult<T, U> withValues(ValueGetter<T, U> valuesGetter) { public <U> SearchResult<T, U> withValues(ValueGetter<T, U> valuesGetter) {

View File

@ -5,6 +5,7 @@ import it.cavallium.dbengine.client.query.current.data.NoSort;
import it.cavallium.dbengine.client.query.current.data.Query; import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams; import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.ScoreMode; import it.cavallium.dbengine.client.query.current.data.ScoreMode;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -51,11 +52,11 @@ public interface LLLuceneIndex extends LLSnapshottable {
*/ */
Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName); Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName);
default Mono<Long> count(@Nullable LLSnapshot snapshot, Query query) { default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), ScoreMode.of(false, false)); QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), ScoreMode.of(false, false));
return Mono.from(this.search(snapshot, params, null) return Mono.from(this.search(snapshot, params, null)
.flatMap(llSearchResultShard -> llSearchResultShard.release().thenReturn(llSearchResultShard.totalHitsCount())) .flatMap(llSearchResultShard -> llSearchResultShard.release().thenReturn(llSearchResultShard.totalHitsCount()))
.defaultIfEmpty(0L)); .defaultIfEmpty(TotalHitsCount.of(0, true)));
} }
boolean isLowMemoryMode(); boolean isLowMemoryMode();

View File

@ -1,6 +1,7 @@
package it.cavallium.dbengine.database; package it.cavallium.dbengine.database;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public record LLSearchResultShard (Flux<LLKeyScore> results, long totalHitsCount, Mono<Void> release) {} public record LLSearchResultShard (Flux<LLKeyScore> results, TotalHitsCount totalHitsCount, Mono<Void> release) {}

View File

@ -469,8 +469,8 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
private ColumnFamilyHandle getCfh(byte[] columnName) throws RocksDBException { private ColumnFamilyHandle getCfh(byte[] columnName) throws RocksDBException {
ColumnFamilyHandle cfh = handles.get(Column.special(Column.toString(columnName))); ColumnFamilyHandle cfh = handles.get(Column.special(Column.toString(columnName)));
//noinspection RedundantIfStatement
if (databaseOptions.enableDbAssertionsWhenUsingAssertions()) { if (databaseOptions.enableDbAssertionsWhenUsingAssertions()) {
//noinspection RedundantIfStatement
if (!enableColumnsBug) { if (!enableColumnsBug) {
assert Arrays.equals(cfh.getName(), columnName); assert Arrays.equals(cfh.getName(), columnName);
} }

View File

@ -6,6 +6,7 @@ import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.client.query.BasicType; import it.cavallium.dbengine.client.query.BasicType;
import it.cavallium.dbengine.client.query.QueryParser; import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams; import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLScoreMode; import it.cavallium.dbengine.database.LLScoreMode;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary; import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
@ -50,6 +51,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
@ -463,4 +465,17 @@ public class LuceneUtils {
public static int totalHitsThreshold() { public static int totalHitsThreshold() {
return 1; return 1;
} }
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
return switch (totalHits.relation) {
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false);
};
}
public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) {
return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(),
totalHitsCount.exact() && totalHitsCount1.exact()
);
}
} }

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser; import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams; import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
@ -18,7 +19,7 @@ public class CountLuceneLocalSearcher implements LuceneLocalSearcher {
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
return Mono return Mono
.fromCallable(() -> new LuceneSearchResult( .fromCallable(() -> new LuceneSearchResult(
indexSearcher.count(queryParams.query()), TotalHitsCount.of(indexSearcher.count(queryParams.query()), true),
Flux.empty(), Flux.empty(),
releaseIndexSearcher) releaseIndexSearcher)
) )

View File

@ -2,7 +2,9 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser; import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams; import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -36,7 +38,7 @@ public class CountLuceneMultiSearcher implements LuceneMultiSearcher {
@Override @Override
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) { public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
return Mono.fromCallable(() -> new LuceneSearchResult(totalHits.get(), Flux.empty(), Mono.when(release))); return Mono.fromCallable(() -> new LuceneSearchResult(TotalHitsCount.of(totalHits.get(), true), Flux.empty(), Mono.when(release)));
} }
}; };
}); });

View File

@ -1,5 +1,6 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.disk.LLLocalKeyValueDatabase; import it.cavallium.dbengine.database.disk.LLLocalKeyValueDatabase;
import java.io.IOException; import java.io.IOException;
@ -14,11 +15,11 @@ public final class LuceneSearchResult {
protected static final Logger logger = LoggerFactory.getLogger(LuceneSearchResult.class); protected static final Logger logger = LoggerFactory.getLogger(LuceneSearchResult.class);
private volatile boolean releaseCalled; private volatile boolean releaseCalled;
private final long totalHitsCount; private final TotalHitsCount totalHitsCount;
private final Flux<LLKeyScore> results; private final Flux<LLKeyScore> results;
private final Mono<Void> release; private final Mono<Void> release;
public LuceneSearchResult(long totalHitsCount, Flux<LLKeyScore> results, Mono<Void> release) { public LuceneSearchResult(TotalHitsCount totalHitsCount, Flux<LLKeyScore> results, Mono<Void> release) {
this.totalHitsCount = totalHitsCount; this.totalHitsCount = totalHitsCount;
this.results = results; this.results = results;
this.release = Mono.fromRunnable(() -> { this.release = Mono.fromRunnable(() -> {
@ -38,7 +39,7 @@ public final class LuceneSearchResult {
super.finalize(); super.finalize();
} }
public long totalHitsCount() { public TotalHitsCount totalHitsCount() {
return totalHitsCount; return totalHitsCount;
} }

View File

@ -140,7 +140,7 @@ class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
); );
}); });
return new LuceneSearchResult(result.totalHits.value, return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits),
firstPageHits firstPageHits
.concatWith(nextHits), .concatWith(nextHits),
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)), //.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),

View File

@ -111,7 +111,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
combinedFlux = firstPageMono; combinedFlux = firstPageMono;
} }
return new LuceneSearchResult(firstPageTopDocs.totalHits.value, combinedFlux, return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(firstPageTopDocs.totalHits), combinedFlux,
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)), //.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
releaseIndexSearcher releaseIndexSearcher
); );

View File

@ -128,7 +128,7 @@ class UnscoredPagedLuceneShardSearcher implements LuceneShardSearcher {
); );
}); });
return new LuceneSearchResult(result.totalHits.value, firstPageHits return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits), firstPageHits
.concatWith(nextHits), .concatWith(nextHits),
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)), //.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
release release

View File

@ -1,5 +1,6 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException; import java.io.IOException;
@ -159,7 +160,7 @@ public class UnscoredUnsortedContinuousLuceneMultiSearcher implements LuceneMult
false false
)); ));
return new LuceneSearchResult(1, resultsFlux, release); return new LuceneSearchResult(TotalHitsCount.of(0, false), resultsFlux, release);
}); });
} }
}; };