Total hits with precision

This commit is contained in:
Andrea Cavalli 2021-08-04 01:12:39 +02:00
parent bcd99f4727
commit 9734d78839
16 changed files with 49 additions and 19 deletions

View File

@ -196,3 +196,7 @@ versions:
data:
onlyTopScores: boolean
computeScores: boolean
TotalHitsCount:
data:
value: long
exact: boolean

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.Delta;
import it.cavallium.dbengine.database.LLSnapshottable;
import it.cavallium.dbengine.database.collections.ValueGetter;
@ -80,7 +81,7 @@ public interface LuceneIndex<T, U> extends LLSnapshottable {
Mono<SearchResult<T, U>> searchWithTransformer(ClientQueryParams<SearchResultItem<T, U>> queryParams,
ValueTransformer<T, U> valueTransformer);
Mono<Long> count(@Nullable CompositeSnapshot snapshot, Query query);
Mono<TotalHitsCount> count(@Nullable CompositeSnapshot snapshot, Query query);
boolean isLowMemoryMode();

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
@ -190,7 +191,7 @@ public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
}
@Override
public Mono<Long> count(@Nullable CompositeSnapshot snapshot, Query query) {
public Mono<TotalHitsCount> count(@Nullable CompositeSnapshot snapshot, Query query) {
return this
.search(ClientQueryParams.<SearchResultKey<T>>builder().snapshot(snapshot).query(query).limit(0).build())
.flatMap(tSearchResultKeys -> tSearchResultKeys.release().thenReturn(tSearchResultKeys.totalHitsCount()));

View File

@ -1,12 +1,13 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
public record SearchResult<T, U>(Flux<SearchResultItem<T, U>> results, long totalHitsCount, Mono<Void> release) {
public record SearchResult<T, U>(Flux<SearchResultItem<T, U>> results, TotalHitsCount totalHitsCount, Mono<Void> release) {
public static <T, U> SearchResult<T, U> empty() {
return new SearchResult<>(Flux.empty(), 0L, Mono.empty());
return new SearchResult<>(Flux.empty(), TotalHitsCount.of(0, true), Mono.empty());
}
public Flux<SearchResultItem<T, U>> resultsThenRelease() {

View File

@ -1,15 +1,16 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.collections.ValueGetter;
import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
@SuppressWarnings("unused")
public record SearchResultKeys<T>(Flux<SearchResultKey<T>> results, long totalHitsCount, Mono<Void> release) {
public record SearchResultKeys<T>(Flux<SearchResultKey<T>> results, TotalHitsCount totalHitsCount, Mono<Void> release) {
public static <T, U> SearchResultKeys<T> empty() {
return new SearchResultKeys<>(Flux.empty(), 0L, Mono.empty());
return new SearchResultKeys<>(Flux.empty(), TotalHitsCount.of(0, true), Mono.empty());
}
public <U> SearchResult<T, U> withValues(ValueGetter<T, U> valuesGetter) {

View File

@ -5,6 +5,7 @@ import it.cavallium.dbengine.client.query.current.data.NoSort;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.ScoreMode;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.List;
import java.util.Map;
@ -51,11 +52,11 @@ public interface LLLuceneIndex extends LLSnapshottable {
*/
Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName);
default Mono<Long> count(@Nullable LLSnapshot snapshot, Query query) {
default Mono<TotalHitsCount> count(@Nullable LLSnapshot snapshot, Query query) {
QueryParams params = QueryParams.of(query, 0, 0, Nullablefloat.empty(), NoSort.of(), ScoreMode.of(false, false));
return Mono.from(this.search(snapshot, params, null)
.flatMap(llSearchResultShard -> llSearchResultShard.release().thenReturn(llSearchResultShard.totalHitsCount()))
.defaultIfEmpty(0L));
.defaultIfEmpty(TotalHitsCount.of(0, true)));
}
boolean isLowMemoryMode();

View File

@ -1,6 +1,7 @@
package it.cavallium.dbengine.database;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
public record LLSearchResultShard (Flux<LLKeyScore> results, long totalHitsCount, Mono<Void> release) {}
public record LLSearchResultShard (Flux<LLKeyScore> results, TotalHitsCount totalHitsCount, Mono<Void> release) {}

View File

@ -469,8 +469,8 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
private ColumnFamilyHandle getCfh(byte[] columnName) throws RocksDBException {
ColumnFamilyHandle cfh = handles.get(Column.special(Column.toString(columnName)));
//noinspection RedundantIfStatement
if (databaseOptions.enableDbAssertionsWhenUsingAssertions()) {
//noinspection RedundantIfStatement
if (!enableColumnsBug) {
assert Arrays.equals(cfh.getName(), columnName);
}

View File

@ -6,6 +6,7 @@ import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.client.query.BasicType;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLScoreMode;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
@ -50,6 +51,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
@ -463,4 +465,17 @@ public class LuceneUtils {
public static int totalHitsThreshold() {
return 1;
}
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
return switch (totalHits.relation) {
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false);
};
}
public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) {
return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(),
totalHitsCount.exact() && totalHitsCount1.exact()
);
}
}

View File

@ -2,6 +2,7 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import org.apache.lucene.search.IndexSearcher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
@ -18,7 +19,7 @@ public class CountLuceneLocalSearcher implements LuceneLocalSearcher {
//noinspection BlockingMethodInNonBlockingContext
return Mono
.fromCallable(() -> new LuceneSearchResult(
indexSearcher.count(queryParams.query()),
TotalHitsCount.of(indexSearcher.count(queryParams.query()), true),
Flux.empty(),
releaseIndexSearcher)
)

View File

@ -2,7 +2,9 @@ package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -36,7 +38,7 @@ public class CountLuceneMultiSearcher implements LuceneMultiSearcher {
@Override
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
return Mono.fromCallable(() -> new LuceneSearchResult(totalHits.get(), Flux.empty(), Mono.when(release)));
return Mono.fromCallable(() -> new LuceneSearchResult(TotalHitsCount.of(totalHits.get(), true), Flux.empty(), Mono.when(release)));
}
};
});

View File

@ -1,5 +1,6 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.disk.LLLocalKeyValueDatabase;
import java.io.IOException;
@ -14,11 +15,11 @@ public final class LuceneSearchResult {
protected static final Logger logger = LoggerFactory.getLogger(LuceneSearchResult.class);
private volatile boolean releaseCalled;
private final long totalHitsCount;
private final TotalHitsCount totalHitsCount;
private final Flux<LLKeyScore> results;
private final Mono<Void> release;
public LuceneSearchResult(long totalHitsCount, Flux<LLKeyScore> results, Mono<Void> release) {
public LuceneSearchResult(TotalHitsCount totalHitsCount, Flux<LLKeyScore> results, Mono<Void> release) {
this.totalHitsCount = totalHitsCount;
this.results = results;
this.release = Mono.fromRunnable(() -> {
@ -38,7 +39,7 @@ public final class LuceneSearchResult {
super.finalize();
}
public long totalHitsCount() {
public TotalHitsCount totalHitsCount() {
return totalHitsCount;
}

View File

@ -140,7 +140,7 @@ class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
);
});
return new LuceneSearchResult(result.totalHits.value,
return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits),
firstPageHits
.concatWith(nextHits),
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),

View File

@ -111,7 +111,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
combinedFlux = firstPageMono;
}
return new LuceneSearchResult(firstPageTopDocs.totalHits.value, combinedFlux,
return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(firstPageTopDocs.totalHits), combinedFlux,
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
releaseIndexSearcher
);

View File

@ -128,7 +128,7 @@ class UnscoredPagedLuceneShardSearcher implements LuceneShardSearcher {
);
});
return new LuceneSearchResult(result.totalHits.value, firstPageHits
return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits), firstPageHits
.concatWith(nextHits),
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
release

View File

@ -1,5 +1,6 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException;
@ -159,7 +160,7 @@ public class UnscoredUnsortedContinuousLuceneMultiSearcher implements LuceneMult
false
));
return new LuceneSearchResult(1, resultsFlux, release);
return new LuceneSearchResult(TotalHitsCount.of(0, false), resultsFlux, release);
});
}
};