2023-02-22 22:31:36 +01:00
|
|
|
package it.cavallium.dbengine.tests;
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2023-02-22 22:31:36 +01:00
|
|
|
import static it.cavallium.dbengine.tests.DbTestUtils.MAX_IN_MEMORY_RESULT_ENTRIES;
|
|
|
|
import static it.cavallium.dbengine.tests.DbTestUtils.ensureNoLeaks;
|
2021-10-13 00:23:56 +02:00
|
|
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
|
|
import static org.junit.jupiter.api.Assertions.fail;
|
|
|
|
|
2023-02-22 22:31:36 +01:00
|
|
|
import it.cavallium.dbengine.tests.DbTestUtils.TempDb;
|
2023-03-06 12:19:08 +01:00
|
|
|
import it.cavallium.buffer.Buf;
|
2021-10-13 00:23:56 +02:00
|
|
|
import it.cavallium.dbengine.client.LuceneIndex;
|
2021-10-28 17:18:23 +02:00
|
|
|
import it.cavallium.dbengine.client.Sort;
|
2021-10-13 00:23:56 +02:00
|
|
|
import it.cavallium.dbengine.client.query.current.data.MatchAllDocsQuery;
|
|
|
|
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
|
|
import it.cavallium.dbengine.database.LLScoreMode;
|
2021-10-13 12:25:32 +02:00
|
|
|
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
|
|
|
|
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
|
2021-11-09 00:05:26 +01:00
|
|
|
import it.cavallium.dbengine.lucene.searcher.CountMultiSearcher;
|
2021-10-13 12:25:32 +02:00
|
|
|
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
|
|
|
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
2021-10-13 00:23:56 +02:00
|
|
|
import java.io.IOException;
|
2023-02-22 22:31:36 +01:00
|
|
|
import java.util.List;
|
2021-10-13 00:23:56 +02:00
|
|
|
import java.util.Objects;
|
2023-02-22 22:31:36 +01:00
|
|
|
import java.util.stream.IntStream;
|
2021-10-13 00:23:56 +02:00
|
|
|
import java.util.stream.Stream;
|
2022-01-04 12:55:33 +01:00
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
|
import org.apache.logging.log4j.Logger;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-10-16 14:35:04 +02:00
|
|
|
import org.junit.jupiter.api.AfterAll;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.junit.jupiter.api.AfterEach;
|
|
|
|
import org.junit.jupiter.api.Assertions;
|
2021-10-16 14:35:04 +02:00
|
|
|
import org.junit.jupiter.api.BeforeAll;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import org.junit.jupiter.params.ParameterizedTest;
|
|
|
|
import org.junit.jupiter.params.provider.Arguments;
|
|
|
|
import org.junit.jupiter.params.provider.MethodSource;
|
|
|
|
|
|
|
|
public class TestLuceneIndex {
|
|
|
|
|
2022-01-04 12:55:33 +01:00
|
|
|
private final Logger log = LogManager.getLogger(this.getClass());
|
2021-10-13 00:23:56 +02:00
|
|
|
private TempDb tempDb;
|
|
|
|
private LLLuceneIndex luceneSingle;
|
|
|
|
private LLLuceneIndex luceneMulti;
|
|
|
|
|
|
|
|
protected TemporaryDbGenerator getTempDbGenerator() {
|
|
|
|
return new MemoryTemporaryDbGenerator();
|
|
|
|
}
|
|
|
|
|
2021-10-16 14:35:04 +02:00
|
|
|
@BeforeAll
|
|
|
|
public static void beforeAll() throws IOException {
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:23:56 +02:00
|
|
|
@BeforeEach
|
2023-02-22 22:31:36 +01:00
|
|
|
public void beforeEach() throws IOException {
|
2023-02-28 23:10:31 +01:00
|
|
|
ensureNoLeaks();
|
2023-02-22 22:31:36 +01:00
|
|
|
tempDb = Objects.requireNonNull(getTempDbGenerator().openTempDb(), "TempDB");
|
2021-10-13 00:23:56 +02:00
|
|
|
luceneSingle = tempDb.luceneSingle();
|
|
|
|
luceneMulti = tempDb.luceneMulti();
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Stream<Arguments> provideArguments() {
|
|
|
|
return Stream.of(false, true).map(Arguments::of);
|
|
|
|
}
|
|
|
|
|
2023-02-22 22:31:36 +01:00
|
|
|
private static final List<Boolean> multi = List.of(false, true);
|
|
|
|
private static final List<LLScoreMode> scoreModes = List.of(LLScoreMode.NO_SCORES,
|
2021-10-13 00:23:56 +02:00
|
|
|
LLScoreMode.TOP_SCORES,
|
|
|
|
LLScoreMode.COMPLETE_NO_SCORES,
|
|
|
|
LLScoreMode.COMPLETE
|
|
|
|
);
|
2023-02-22 22:31:36 +01:00
|
|
|
private static final List<Sort> multiSort = List.of(Sort.score(),
|
2021-10-28 17:18:23 +02:00
|
|
|
Sort.random(),
|
|
|
|
Sort.no(),
|
|
|
|
Sort.doc(),
|
|
|
|
Sort.numeric("longsort", false),
|
|
|
|
Sort.numeric("longsort", true)
|
2021-10-13 00:23:56 +02:00
|
|
|
);
|
|
|
|
|
2023-02-22 22:31:36 +01:00
|
|
|
record Tuple2<X, Y>(X getT1, Y getT2) {
|
|
|
|
|
|
|
|
public Object[] toArray() {
|
|
|
|
return new Object[] {getT1, getT2};
|
|
|
|
}
|
|
|
|
}
|
2023-02-22 23:31:05 +01:00
|
|
|
record Tuple3<X, Y, Z>(X getT1, Y getT2, Z getT3) {
|
2023-02-22 22:31:36 +01:00
|
|
|
|
|
|
|
public Object[] toArray() {
|
|
|
|
return new Object[] {getT1, getT2, getT3};
|
|
|
|
}
|
|
|
|
}
|
2023-02-22 23:31:05 +01:00
|
|
|
record Tuple4<X, Y, Z, W>(X getT1, Y getT2, Z getT3, W getT4) {
|
2023-02-22 22:31:36 +01:00
|
|
|
|
|
|
|
public Object[] toArray() {
|
|
|
|
return new Object[] {getT1, getT2, getT3, getT4};
|
|
|
|
}
|
|
|
|
}
|
2023-02-22 23:31:05 +01:00
|
|
|
record Tuple5<X, Y, Z, W, X1>(X getT1, Y getT2, Z getT3, W getT4, X1 getT5) {
|
2023-02-22 22:31:36 +01:00
|
|
|
|
|
|
|
public Object[] toArray() {
|
|
|
|
return new Object[] {getT1, getT2, getT3, getT4, getT5};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-13 00:23:56 +02:00
|
|
|
public static Stream<Arguments> provideQueryArgumentsScoreMode() {
|
2023-02-22 22:31:36 +01:00
|
|
|
return multi.stream()
|
|
|
|
.flatMap(shard -> scoreModes.stream().map(scoreMode -> new Tuple2<>(shard, scoreMode)))
|
|
|
|
.map(tuple -> Arguments.of(tuple.toArray()));
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public static Stream<Arguments> provideQueryArgumentsSort() {
|
2023-02-22 22:31:36 +01:00
|
|
|
return multi.stream()
|
|
|
|
.flatMap(shard -> multiSort.stream().map(multiSort -> new Tuple2<>(shard, multiSort)))
|
|
|
|
.map(tuple -> Arguments.of(tuple.toArray()));
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public static Stream<Arguments> provideQueryArgumentsScoreModeAndSort() {
|
2023-02-22 22:31:36 +01:00
|
|
|
return multi.stream()
|
|
|
|
.flatMap(shard -> scoreModes.stream().map(scoreMode -> new Tuple2<>(shard, scoreMode)))
|
|
|
|
.flatMap(tuple -> multiSort.stream().map(multiSort -> new Tuple3<>(tuple.getT1(), tuple.getT2(), multiSort)))
|
|
|
|
.map(tuple -> Arguments.of(tuple.toArray()));
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@AfterEach
|
2023-02-22 22:31:36 +01:00
|
|
|
public void afterEach() throws IOException {
|
|
|
|
getTempDbGenerator().closeTempDb(tempDb);
|
2023-02-28 23:10:31 +01:00
|
|
|
ensureNoLeaks();
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
2021-10-16 14:35:04 +02:00
|
|
|
@AfterAll
|
2021-10-16 14:59:38 +02:00
|
|
|
public static void afterAll() throws IOException {
|
2021-10-16 14:35:04 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 12:25:32 +02:00
|
|
|
private LuceneIndex<String, String> getLuceneIndex(boolean shards, @Nullable LocalSearcher customSearcher) {
|
2023-02-22 22:31:36 +01:00
|
|
|
LuceneIndex<String, String> index = DbTestUtils.tempLuceneIndex(shards ? luceneSingle : luceneMulti);
|
|
|
|
index.updateDocument("test-key-1", "0123456789");
|
|
|
|
index.updateDocument("test-key-2", "test 0123456789 test word");
|
|
|
|
index.updateDocument("test-key-3", "0123456789 test example string");
|
|
|
|
index.updateDocument("test-key-4", "hello world the quick brown fox jumps over the lazy dog");
|
|
|
|
index.updateDocument("test-key-5", "hello the quick brown fox jumps over the lazy dog");
|
|
|
|
index.updateDocument("test-key-6", "hello the quick brown fox jumps over the world dog");
|
|
|
|
index.updateDocument("test-key-7", "the quick brown fox jumps over the world dog");
|
|
|
|
index.updateDocument("test-key-8", "the quick brown fox jumps over the lazy dog");
|
|
|
|
index.updateDocument("test-key-9", "Example1");
|
|
|
|
index.updateDocument("test-key-10", "Example2");
|
|
|
|
index.updateDocument("test-key-11", "Example3");
|
|
|
|
index.updateDocument("test-key-12", "-234");
|
|
|
|
index.updateDocument("test-key-13", "2111");
|
|
|
|
index.updateDocument("test-key-14", "2999");
|
|
|
|
index.updateDocument("test-key-15", "3902");
|
|
|
|
IntStream.rangeClosed(1, 1000).forEach(i -> index.updateDocument("test-key-" + (15 + i), "" + i));
|
2021-11-09 00:05:26 +01:00
|
|
|
tempDb.swappableLuceneSearcher().setSingle(new CountMultiSearcher());
|
|
|
|
tempDb.swappableLuceneSearcher().setMulti(new CountMultiSearcher());
|
2021-10-13 00:23:56 +02:00
|
|
|
assertCount(index, 1000 + 15);
|
2021-10-16 14:35:04 +02:00
|
|
|
if (customSearcher != null) {
|
|
|
|
tempDb.swappableLuceneSearcher().setSingle(customSearcher);
|
|
|
|
if (shards) {
|
|
|
|
if (customSearcher instanceof MultiSearcher multiSearcher) {
|
|
|
|
tempDb.swappableLuceneSearcher().setMulti(multiSearcher);
|
|
|
|
} else {
|
|
|
|
throw new IllegalArgumentException("Expected a LuceneMultiSearcher, got a LuceneLocalSearcher: " + customSearcher.getName());
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
}
|
2021-10-16 14:35:04 +02:00
|
|
|
} else {
|
2023-02-22 22:31:36 +01:00
|
|
|
tempDb.swappableLuceneSearcher().setSingle(new AdaptiveLocalSearcher(MAX_IN_MEMORY_RESULT_ENTRIES));
|
|
|
|
tempDb.swappableLuceneSearcher().setMulti(new AdaptiveMultiSearcher(MAX_IN_MEMORY_RESULT_ENTRIES));
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
|
|
|
|
private void assertCount(LuceneIndex<String, String> luceneIndex, long expected) {
|
|
|
|
Assertions.assertEquals(expected, getCount(luceneIndex));
|
|
|
|
}
|
|
|
|
|
|
|
|
private long getCount(LuceneIndex<String, String> luceneIndex) {
|
2023-02-22 22:31:36 +01:00
|
|
|
luceneIndex.refresh(true);
|
|
|
|
var totalHitsCount = luceneIndex.count(null, new MatchAllDocsQuery());
|
2021-10-13 00:23:56 +02:00
|
|
|
Assertions.assertTrue(totalHitsCount.exact(), "Can't get count because the total hits count is not exact");
|
|
|
|
return totalHitsCount.value();
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testNoOp() {
|
|
|
|
}
|
|
|
|
|
|
|
|
@Test
|
|
|
|
public void testNoOpAllocation() {
|
|
|
|
for (int i = 0; i < 10; i++) {
|
2023-02-22 22:31:36 +01:00
|
|
|
var a = Buf.create(i * 512);
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
@MethodSource("provideArguments")
|
|
|
|
public void testGetLuceneIndex(boolean shards) {
|
2023-02-22 23:31:05 +01:00
|
|
|
try (var luceneIndex = getLuceneIndex(shards, null)) {
|
|
|
|
Assertions.assertNotNull(luceneIndex);
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
@MethodSource("provideArguments")
|
|
|
|
public void testDeleteAll(boolean shards) {
|
2023-02-22 23:31:05 +01:00
|
|
|
try (var luceneIndex = getLuceneIndex(shards, null)) {
|
|
|
|
luceneIndex.deleteAll();
|
|
|
|
assertCount(luceneIndex, 0);
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
@MethodSource("provideArguments")
|
|
|
|
public void testDelete(boolean shards) {
|
2023-02-22 23:31:05 +01:00
|
|
|
try (var luceneIndex = getLuceneIndex(shards, null)) {
|
|
|
|
var prevCount = getCount(luceneIndex);
|
|
|
|
luceneIndex.deleteDocument("test-key-1");
|
|
|
|
assertCount(luceneIndex, prevCount - 1);
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
@MethodSource("provideArguments")
|
|
|
|
public void testUpdateSameDoc(boolean shards) {
|
2023-02-22 23:31:05 +01:00
|
|
|
try (var luceneIndex = getLuceneIndex(shards, null)) {
|
|
|
|
var prevCount = getCount(luceneIndex);
|
|
|
|
luceneIndex.updateDocument("test-key-1", "new-value");
|
|
|
|
assertCount(luceneIndex, prevCount);
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@ParameterizedTest
|
|
|
|
@MethodSource("provideArguments")
|
|
|
|
public void testUpdateNewDoc(boolean shards) {
|
2023-02-22 23:31:05 +01:00
|
|
|
try (var luceneIndex = getLuceneIndex(shards, null)) {
|
|
|
|
var prevCount = getCount(luceneIndex);
|
|
|
|
luceneIndex.updateDocument("test-key-new", "new-value");
|
|
|
|
assertCount(luceneIndex, prevCount + 1);
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|