CavalliumDBEngine/src/test/java/it/cavallium/dbengine/TestLuceneSearches.java

383 lines
15 KiB
Java
Raw Normal View History

package it.cavallium.dbengine;
2021-12-12 23:40:30 +01:00
import static it.cavallium.dbengine.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES;
import static it.cavallium.dbengine.DbTestUtils.destroyAllocator;
import static it.cavallium.dbengine.DbTestUtils.ensureNoLeaks;
import static it.cavallium.dbengine.DbTestUtils.newAllocator;
import static it.cavallium.dbengine.SyncUtils.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
2021-10-15 22:03:53 +02:00
import io.net5.buffer.PooledByteBufAllocator;
import it.cavallium.dbengine.DbTestUtils.TempDb;
import it.cavallium.dbengine.DbTestUtils.TestAllocator;
2021-10-28 23:48:25 +02:00
import it.cavallium.dbengine.client.HitKey;
import it.cavallium.dbengine.client.Hits;
import it.cavallium.dbengine.client.LuceneIndex;
2021-10-28 17:18:23 +02:00
import it.cavallium.dbengine.client.Sort;
2021-10-28 23:48:25 +02:00
import it.cavallium.dbengine.client.LazyHitKey;
import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.ClientQueryParamsBuilder;
2021-10-16 01:35:09 +02:00
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
import it.cavallium.dbengine.client.query.current.data.MatchAllDocsQuery;
import it.cavallium.dbengine.client.query.current.data.MatchNoDocsQuery;
2021-10-16 01:35:09 +02:00
import it.cavallium.dbengine.client.query.current.data.OccurMust;
import it.cavallium.dbengine.client.query.current.data.OccurShould;
import it.cavallium.dbengine.client.query.current.data.Term;
import it.cavallium.dbengine.client.query.current.data.TermQuery;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.disk.LLTempLMDBEnv;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
2021-11-09 00:05:26 +01:00
import it.cavallium.dbengine.lucene.searcher.CountMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.lucene.searcher.OfficialSearcher;
import it.cavallium.dbengine.lucene.searcher.ScoredPagedMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.PagedLocalSearcher;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.lucene.searcher.SortedScoredFullMultiSearcher;
2021-12-23 02:13:51 +01:00
import it.cavallium.dbengine.lucene.searcher.SortedByScoreFullMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.UnsortedStreamingMultiSearcher;
import java.io.IOException;
2021-10-13 14:26:54 +02:00
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
2021-10-13 14:26:54 +02:00
import java.util.Map;
import java.util.Objects;
2021-10-13 14:26:54 +02:00
import java.util.stream.Collectors;
import java.util.stream.Stream;
2021-10-13 13:16:56 +02:00
import org.apache.commons.lang3.function.FailableConsumer;
2022-01-04 12:55:33 +01:00
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import reactor.core.publisher.Flux;
import reactor.core.publisher.FluxSink.OverflowStrategy;
2021-10-13 14:26:54 +02:00
import reactor.core.scheduler.Schedulers;
import reactor.util.function.Tuples;
public class TestLuceneSearches {
2022-01-04 12:55:33 +01:00
private static final Logger log = LogManager.getLogger(TestLuceneSearches.class);
private static LLTempLMDBEnv ENV;
private static final MemoryTemporaryDbGenerator TEMP_DB_GENERATOR = new MemoryTemporaryDbGenerator();
private static TestAllocator allocator;
private static TempDb tempDb;
private static LLLuceneIndex luceneSingle;
private static LLLuceneIndex luceneMulti;
private static LuceneIndex<String, String> multiIndex;
private static LuceneIndex<String, String> localIndex;
private static final Map<String, String> ELEMENTS;
2021-10-13 14:26:54 +02:00
static {
2021-10-15 22:03:53 +02:00
// Start the pool by creating and deleting a direct buffer
PooledByteBufAllocator.DEFAULT.directBuffer().release();
2021-10-13 14:26:54 +02:00
var modifiableElements = new HashMap<String, String>();
modifiableElements.put("test-key-1", "0123456789");
modifiableElements.put("test-key-2", "test 0123456789 test word");
modifiableElements.put("test-key-3", "0123456789 test example string");
modifiableElements.put("test-key-4", "hello world the quick brown fox jumps over the lazy dog");
modifiableElements.put("test-key-5", "hello the quick brown fox jumps over the lazy dog");
modifiableElements.put("test-key-6", "hello the quick brown fox jumps over the world dog");
modifiableElements.put("test-key-7", "the quick brown fox jumps over the world dog");
modifiableElements.put("test-key-8", "the quick brown fox jumps over the lazy dog");
modifiableElements.put("test-key-9", "Example1");
modifiableElements.put("test-key-10", "Example2");
modifiableElements.put("test-key-11", "Example3");
modifiableElements.put("test-key-12", "-234");
modifiableElements.put("test-key-13", "2111");
modifiableElements.put("test-key-14", "2999");
modifiableElements.put("test-key-15", "3902");
runVoid(Flux.range(1, 1000).doOnNext(i -> modifiableElements.put("test-key-" + (15 + i), "" + i)).then());
ELEMENTS = Collections.unmodifiableMap(modifiableElements);
2021-10-13 14:26:54 +02:00
}
@BeforeAll
public static void beforeAll() throws IOException {
allocator = newAllocator();
ensureNoLeaks(allocator.allocator(), false, false);
tempDb = Objects.requireNonNull(TEMP_DB_GENERATOR.openTempDb(allocator).block(), "TempDB");
luceneSingle = tempDb.luceneSingle();
luceneMulti = tempDb.luceneMulti();
ENV = new LLTempLMDBEnv();
assertEquals(0, ENV.countUsedDbs());
setUpIndex(true);
setUpIndex(false);
}
private static void setUpIndex(boolean shards) {
LuceneIndex<String, String> index = run(DbTestUtils.tempLuceneIndex(shards ? luceneSingle : luceneMulti));
2021-10-13 14:26:54 +02:00
Flux
.fromIterable(ELEMENTS.entrySet())
2021-10-13 14:26:54 +02:00
.flatMap(entry -> index.updateDocument(entry.getKey(), entry.getValue()))
.subscribeOn(Schedulers.boundedElastic())
.blockLast();
2021-11-09 00:05:26 +01:00
tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher());
tempDb.swappableLuceneSearcher().setMulti(new CountMultiSearcher());
assertCount(index, 1000 + 15);
if (shards) {
multiIndex = index;
} else {
localIndex = index;
}
}
public static Stream<Arguments> provideArguments() {
return Stream.of(false, true).map(Arguments::of);
}
private static final Flux<Boolean> multi = Flux.just(false, true);
2021-10-28 17:18:23 +02:00
private static final Flux<Sort> multiSort = Flux.just(
Sort.score(),
2021-10-13 14:26:54 +02:00
//todo: fix random sort field
2021-10-28 17:18:23 +02:00
//Sort.randomSortField(),
Sort.no(),
Sort.doc(),
Sort.numeric("longsort", false),
Sort.numeric("longsort", true),
Sort.numeric("intsort", false),
Sort.numeric("intsort", true)
);
private static Flux<LocalSearcher> getSearchers(ExpectedQueryType info) {
return Flux.push(sink -> {
if (info.shard()) {
if (info.onlyCount()) {
2021-11-09 00:05:26 +01:00
sink.next(new CountMultiSearcher());
} else {
sink.next(new ScoredPagedMultiSearcher());
if (info.sorted() && !info.sortedByScore()) {
sink.next(new SortedScoredFullMultiSearcher(ENV));
} else {
2021-12-23 02:13:51 +01:00
sink.next(new SortedByScoreFullMultiSearcher(ENV));
}
if (!info.sorted()) {
sink.next(new UnsortedUnscoredSimpleMultiSearcher(new PagedLocalSearcher()));
2021-12-23 02:13:51 +01:00
sink.next(new UnsortedStreamingMultiSearcher());
}
}
2021-12-12 23:40:30 +01:00
sink.next(new AdaptiveMultiSearcher(ENV, true, MAX_IN_MEMORY_RESULT_ENTRIES));
} else {
if (info.onlyCount()) {
2021-11-09 00:05:26 +01:00
sink.next(new CountMultiSearcher());
} else {
sink.next(new PagedLocalSearcher());
}
2021-12-12 23:40:30 +01:00
sink.next(new AdaptiveLocalSearcher(ENV, true, MAX_IN_MEMORY_RESULT_ENTRIES));
}
sink.complete();
}, OverflowStrategy.BUFFER);
}
public static Stream<Arguments> provideQueryArgumentsScoreMode() {
return multi.map(tuple -> Arguments.of(multi)).toStream();
}
public static Stream<Arguments> provideQueryArgumentsScoreModeAndSort() {
return multi
.concatMap(multi -> multiSort.map(multiSort -> Tuples.of(multi, multiSort)))
.map(tuple -> Arguments.of(tuple.toArray()))
.toStream();
}
2021-10-15 22:03:53 +02:00
private static void runSearchers(ExpectedQueryType expectedQueryType, FailableConsumer<LocalSearcher, Throwable> consumer)
throws Throwable {
var searchers = run(getSearchers(expectedQueryType).collectList());
for (LocalSearcher searcher : searchers) {
log.info("Using searcher \"{}\"", searcher.getName());
2021-10-15 22:03:53 +02:00
consumer.accept(searcher);
}
2021-10-13 13:16:56 +02:00
}
@BeforeEach
public void beforeEach() {
assertEquals(0, ENV.countUsedDbs());
}
@AfterEach
public void afterEach() {
assertEquals(0, ENV.countUsedDbs());
}
@AfterAll
2021-10-16 14:59:38 +02:00
public static void afterAll() throws IOException {
TEMP_DB_GENERATOR.closeTempDb(tempDb).block();
assertEquals(0, ENV.countUsedDbs());
2021-10-16 14:59:38 +02:00
ENV.close();
ensureNoLeaks(allocator.allocator(), true, false);
destroyAllocator(allocator);
}
private LuceneIndex<String, String> getLuceneIndex(boolean shards, @Nullable LocalSearcher customSearcher) {
if (customSearcher != null) {
tempDb.swappableLuceneSearcher().setSingle(customSearcher);
if (shards) {
if (customSearcher instanceof MultiSearcher multiSearcher) {
tempDb.swappableLuceneSearcher().setMulti(multiSearcher);
} else {
throw new IllegalArgumentException("Expected a LuceneMultiSearcher, got a LuceneLocalSearcher: " + customSearcher.getName());
}
}
} else {
2021-12-12 23:40:30 +01:00
tempDb.swappableLuceneSearcher().setSingle(new AdaptiveLocalSearcher(ENV, true, MAX_IN_MEMORY_RESULT_ENTRIES));
tempDb.swappableLuceneSearcher().setMulti(new AdaptiveMultiSearcher(ENV, true, MAX_IN_MEMORY_RESULT_ENTRIES));
}
return shards ? multiIndex : localIndex;
}
private static void assertCount(LuceneIndex<String, String> luceneIndex, long expected) {
Assertions.assertEquals(expected, getCount(luceneIndex));
}
private static long getCount(LuceneIndex<String, String> luceneIndex) {
luceneIndex.refresh(true).block();
var totalHitsCount = run(luceneIndex.count(null, new MatchAllDocsQuery()));
Assertions.assertTrue(totalHitsCount.exact(), "Can't get count because the total hits count is not exact");
return totalHitsCount.value();
}
2021-10-13 14:26:54 +02:00
private boolean supportsPreciseHitsCount(LocalSearcher searcher,
2021-10-28 23:48:25 +02:00
ClientQueryParams query) {
var sorted = query.isSorted();
2021-12-23 02:13:51 +01:00
if (searcher instanceof UnsortedStreamingMultiSearcher) {
2021-10-13 14:26:54 +02:00
return false;
} else if (!sorted) {
return !(searcher instanceof AdaptiveMultiSearcher) && !(searcher instanceof AdaptiveLocalSearcher);
} else {
return true;
2021-10-13 14:26:54 +02:00
}
}
2021-10-28 23:48:25 +02:00
public void testSearch(ClientQueryParamsBuilder queryParamsBuilder,
2021-10-16 00:32:04 +02:00
ExpectedQueryType expectedQueryType) throws Throwable {
2021-10-16 00:32:04 +02:00
runSearchers(expectedQueryType, searcher -> {
var luceneIndex = getLuceneIndex(expectedQueryType.shard(), searcher);
var query = queryParamsBuilder.build();
2021-11-08 11:17:52 +01:00
try (var results = run(luceneIndex.search(query))) {
var hits = results.totalHitsCount();
var keys = getResults(results);
2021-10-16 00:32:04 +02:00
if (hits.exact()) {
Assertions.assertEquals(keys.size(), hits.value());
} else {
Assertions.assertTrue(keys.size() >= hits.value());
}
2021-10-15 22:03:53 +02:00
2021-12-12 16:41:49 +01:00
var officialSearcher = new OfficialSearcher();
2021-10-16 00:32:04 +02:00
luceneIndex = getLuceneIndex(expectedQueryType.shard(), officialSearcher);
var officialQuery = queryParamsBuilder.limit(ELEMENTS.size() * 2L).build();
2021-11-08 11:17:52 +01:00
try (var officialResults = run(luceneIndex.search(officialQuery))) {
2021-10-16 00:32:04 +02:00
var officialHits = officialResults.totalHitsCount();
2021-10-15 22:03:53 +02:00
var officialKeys = getResults(officialResults).stream().toList();
2021-10-16 00:32:04 +02:00
if (officialHits.exact()) {
Assertions.assertEquals(officialKeys.size(), officialHits.value());
} else {
Assertions.assertTrue(officialKeys.size() >= officialHits.value());
}
2021-10-15 22:03:53 +02:00
2021-10-16 00:32:04 +02:00
if (hits.exact() && officialHits.exact()) {
assertExactHits(officialHits.value(), hits);
}
2021-10-15 22:03:53 +02:00
2021-10-16 00:32:04 +02:00
Assertions.assertEquals(officialKeys.size(), keys.size());
2021-10-16 00:50:06 +02:00
2021-10-16 00:32:04 +02:00
assertResults(officialKeys, keys, expectedQueryType.sorted(), expectedQueryType.sortedByScore());
}
}
2021-10-13 13:16:56 +02:00
});
}
2021-10-16 00:32:04 +02:00
@ParameterizedTest
@MethodSource("provideQueryArgumentsScoreModeAndSort")
2021-10-28 17:18:23 +02:00
public void testSearchNoDocs(boolean shards, Sort multiSort) throws Throwable {
2021-10-16 00:50:06 +02:00
var queryBuilder = ClientQueryParams
2021-10-28 23:48:25 +02:00
.<LazyHitKey<String>>builder()
2021-10-16 00:50:06 +02:00
.query(new MatchNoDocsQuery())
.snapshot(null)
2021-11-16 23:54:23 +01:00
.computePreciseHitsCount(true)
2021-10-16 00:50:06 +02:00
.sort(multiSort);
2021-10-16 00:32:04 +02:00
ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false);
testSearch(queryBuilder, expectedQueryType);
}
@ParameterizedTest
@MethodSource("provideQueryArgumentsScoreModeAndSort")
2021-10-28 17:18:23 +02:00
public void testSearchAllDocs(boolean shards, Sort multiSort) throws Throwable {
2021-10-16 00:50:06 +02:00
var queryBuilder = ClientQueryParams
2021-10-28 23:48:25 +02:00
.<LazyHitKey<String>>builder()
2021-10-16 00:50:06 +02:00
.query(new MatchAllDocsQuery())
.snapshot(null)
2021-11-16 23:54:23 +01:00
.computePreciseHitsCount(true)
2021-10-16 00:50:06 +02:00
.sort(multiSort);
2021-10-16 00:32:04 +02:00
ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false);
testSearch(queryBuilder, expectedQueryType);
}
2021-10-16 01:35:09 +02:00
@ParameterizedTest
@MethodSource("provideQueryArgumentsScoreModeAndSort")
2021-10-28 17:18:23 +02:00
public void testSearchAdvancedText(boolean shards, Sort multiSort) throws Throwable {
2021-10-16 01:35:09 +02:00
var queryBuilder = ClientQueryParams
2021-10-28 23:48:25 +02:00
.<LazyHitKey<String>>builder()
2021-10-16 01:35:09 +02:00
.query(new BooleanQuery(List.of(
new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 3), new OccurShould()),
new BooleanQueryPart(new TermQuery(new Term("text", "world")), new OccurShould()),
new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 2), new OccurShould()),
new BooleanQueryPart(new BoostQuery(new TermQuery(new Term("text", "hello")), 100), new OccurShould()),
new BooleanQueryPart(new TermQuery(new Term("text", "hello")), new OccurMust())
), 1))
.snapshot(null)
2021-11-16 23:54:23 +01:00
.computePreciseHitsCount(true)
2021-10-16 01:35:09 +02:00
.sort(multiSort);
2021-10-14 15:55:58 +02:00
2021-10-16 01:35:09 +02:00
ExpectedQueryType expectedQueryType = new ExpectedQueryType(shards, multiSort, true, false);
testSearch(queryBuilder, expectedQueryType);
}
2021-10-14 15:55:58 +02:00
2021-10-16 01:35:09 +02:00
private void assertResults(List<Scored> expectedKeys, List<Scored> resultKeys, boolean sorted, boolean sortedByScore) {
2021-10-15 22:03:53 +02:00
if (sortedByScore) {
Assertions.assertEquals(expectedKeys, resultKeys);
} else if (sorted) {
2021-10-13 14:26:54 +02:00
var results = resultKeys.stream().map(Scored::key).toList();
2021-10-15 22:03:53 +02:00
Assertions.assertEquals(expectedKeys.stream().map(Scored::key).toList(), results);
} else {
var results = resultKeys.stream().map(Scored::key).collect(Collectors.toSet());
Assertions.assertEquals(new HashSet<>(expectedKeys.stream().map(Scored::key).toList()), results);
2021-10-13 14:26:54 +02:00
}
}
private void assertHitsIfPossible(long expectedCount, TotalHitsCount hits) {
if (hits.exact()) {
assertEquals(new TotalHitsCount(expectedCount, true), hits);
}
}
2021-10-16 00:32:04 +02:00
private void assertExactHits(long expectedCount, TotalHitsCount hits) {
assertEquals(new TotalHitsCount(expectedCount, true), hits);
2021-10-13 14:26:54 +02:00
}
2021-10-28 23:48:25 +02:00
private List<Scored> getResults(Hits<HitKey<String>> results) {
return run(results
.results()
2021-10-28 23:48:25 +02:00
.map(key -> new Scored(key.key(), key.score()))
.collectList());
}
}