From a3d1207d7624e03f0939e093f770ddb79d4a8d7d Mon Sep 17 00:00:00 2001 From: Andrea Cavalli Date: Mon, 20 Jun 2022 11:55:41 +0200 Subject: [PATCH] Implement configurable merge policies --- src/main/data-generator/quic-rpc.yaml | 11 ++++ .../database/disk/LLLocalLuceneIndex.java | 2 +- .../dbengine/lucene/LuceneUtils.java | 60 ++++++++++++++++++- .../dbengine/LocalTemporaryDbGenerator.java | 3 +- .../dbengine/MemoryTemporaryDbGenerator.java | 3 +- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/src/main/data-generator/quic-rpc.yaml b/src/main/data-generator/quic-rpc.yaml index f437750..fde4c43 100644 --- a/src/main/data-generator/quic-rpc.yaml +++ b/src/main/data-generator/quic-rpc.yaml @@ -332,6 +332,17 @@ versions: writeAllDeletes: -boolean allowNonVolatileCollection: boolean maxInMemoryResultEntries: int + mergePolicy: TieredMergePolicy + TieredMergePolicy: + data: + forceMergeDeletesPctAllowed: -double + deletesPctAllowed: -double + maxMergeAtOnce: -int + maxMergedSegmentBytes: -long + floorSegmentBytes: -long + segmentsPerTier: -double + maxCFSSegmentSizeBytes: -long + noCFSRatio: -double ByteBuffersDirectory: { data: { } } MemoryMappedFSDirectory: data: diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java index 4a92467..28d82fd 100644 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java @@ -203,7 +203,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex { } logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount); indexWriterConfig.setMergeScheduler(mergeScheduler); - indexWriterConfig.setMergePolicy(new TieredMergePolicy()); + indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions)); if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) { indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get()); } diff --git a/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java b/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java index 55c72c6..a5325ab 100644 --- a/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java +++ b/src/main/java/it/cavallium/dbengine/lucene/LuceneUtils.java @@ -4,6 +4,9 @@ import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterrupti import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; +import it.cavallium.data.generator.nativedata.Nullabledouble; +import it.cavallium.data.generator.nativedata.Nullableint; +import it.cavallium.data.generator.nativedata.Nullablelong; import it.cavallium.dbengine.client.CompositeSnapshot; import it.cavallium.dbengine.client.query.QueryParser; import it.cavallium.dbengine.client.query.current.data.QueryParams; @@ -33,6 +36,7 @@ import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers; import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities; import it.cavallium.dbengine.rpc.current.data.LuceneDirectoryOptions; import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure; +import it.cavallium.dbengine.rpc.current.data.LuceneOptions; import it.cavallium.dbengine.rpc.current.data.MemoryMappedFSDirectory; import it.cavallium.dbengine.rpc.current.data.NIOFSDirectory; import it.cavallium.dbengine.rpc.current.data.NRTCachingDirectory; @@ -67,6 +71,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.misc.store.DirectIODirectory; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery.Builder; @@ -98,6 +104,7 @@ import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model; import org.novasearch.lucene.search.similarities.LdpSimilarity; import org.novasearch.lucene.search.similarities.LtcSimilarity; import org.novasearch.lucene.search.similarities.RobertsonSimilarity; +import org.rocksdb.util.SizeUnit; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import reactor.core.scheduler.Schedulers; @@ -140,6 +147,16 @@ public class LuceneUtils { private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits(); private static final CharArraySet ENGLISH_AND_ITALIAN_STOP_WORDS; private static final LuceneIndexStructure SINGLE_STRUCTURE = new LuceneIndexStructure(1, IntList.of(0)); + private static final it.cavallium.dbengine.rpc.current.data.TieredMergePolicy DEFAULT_MERGE_POLICY = new it.cavallium.dbengine.rpc.current.data.TieredMergePolicy( + Nullabledouble.empty(), + Nullabledouble.empty(), + Nullableint.empty(), + Nullablelong.empty(), + Nullablelong.empty(), + Nullabledouble.empty(), + Nullablelong.empty(), + Nullabledouble.empty() + ); static { var cas = new CharArraySet( @@ -608,8 +625,8 @@ public class LuceneUtils { } else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) { var delegateDirectory = createLuceneDirectory(nrtCachingDirectory.delegate(), directoryName, rocksDBManager); return new org.apache.lucene.store.NRTCachingDirectory(delegateDirectory, - nrtCachingDirectory.maxMergeSizeBytes() / 1024D / 1024D, - nrtCachingDirectory.maxCachedBytes() / 1024D / 1024D + toMB(nrtCachingDirectory.maxMergeSizeBytes()), + toMB(nrtCachingDirectory.maxCachedBytes()) ); } else if (directoryOptions instanceof RocksDBSharedDirectory rocksDBSharedDirectory) { var dbInstance = rocksDBManager.getOrCreate(rocksDBSharedDirectory.managedPath()); @@ -687,4 +704,43 @@ public class LuceneUtils { public static LuceneIndexStructure shardsStructure(int count) { return new LuceneIndexStructure(count, intListTo(count)); } + + public static MergePolicy getMergePolicy(LuceneOptions luceneOptions) { + var mergePolicy = new TieredMergePolicy(); + var mergePolicyOptions = luceneOptions.mergePolicy(); + if (mergePolicyOptions.deletesPctAllowed().isPresent()) { + mergePolicy.setDeletesPctAllowed(mergePolicyOptions.deletesPctAllowed().get()); + } + if (mergePolicyOptions.forceMergeDeletesPctAllowed().isPresent()) { + mergePolicy.setForceMergeDeletesPctAllowed(mergePolicyOptions.forceMergeDeletesPctAllowed().get()); + } + if (mergePolicyOptions.maxMergeAtOnce().isPresent()) { + mergePolicy.setMaxMergeAtOnce(mergePolicyOptions.maxMergeAtOnce().get()); + } + if (mergePolicyOptions.maxMergedSegmentBytes().isPresent()) { + mergePolicy.setMaxMergedSegmentMB(toMB(mergePolicyOptions.maxMergedSegmentBytes().get())); + } + if (mergePolicyOptions.floorSegmentBytes().isPresent()) { + mergePolicy.setFloorSegmentMB(toMB(mergePolicyOptions.floorSegmentBytes().get())); + } + if (mergePolicyOptions.segmentsPerTier().isPresent()) { + mergePolicy.setSegmentsPerTier(mergePolicyOptions.segmentsPerTier().get()); + } + if (mergePolicyOptions.maxCFSSegmentSizeBytes().isPresent()) { + mergePolicy.setMaxCFSSegmentSizeMB(toMB(mergePolicyOptions.maxCFSSegmentSizeBytes().get())); + } + if (mergePolicyOptions.noCFSRatio().isPresent()) { + mergePolicy.setNoCFSRatio(mergePolicyOptions.noCFSRatio().get()); + } + return mergePolicy; + } + + public static double toMB(long bytes) { + if (bytes == Long.MAX_VALUE) return Double.MAX_VALUE; + return ((double) bytes) / 1024D / 1024D; + } + + public static it.cavallium.dbengine.rpc.current.data.TieredMergePolicy getDefaultMergePolicy() { + return DEFAULT_MERGE_POLICY; + } } diff --git a/src/test/java/it/cavallium/dbengine/LocalTemporaryDbGenerator.java b/src/test/java/it/cavallium/dbengine/LocalTemporaryDbGenerator.java index fe83310..6b1a2fe 100644 --- a/src/test/java/it/cavallium/dbengine/LocalTemporaryDbGenerator.java +++ b/src/test/java/it/cavallium/dbengine/LocalTemporaryDbGenerator.java @@ -53,7 +53,8 @@ public class LocalTemporaryDbGenerator implements TemporaryDbGenerator { Nullableboolean.empty(), Nullableboolean.empty(), true, - MAX_IN_MEMORY_RESULT_ENTRIES + MAX_IN_MEMORY_RESULT_ENTRIES, + LuceneUtils.getDefaultMergePolicy() ); @Override diff --git a/src/test/java/it/cavallium/dbengine/MemoryTemporaryDbGenerator.java b/src/test/java/it/cavallium/dbengine/MemoryTemporaryDbGenerator.java index f72814a..15e3490 100644 --- a/src/test/java/it/cavallium/dbengine/MemoryTemporaryDbGenerator.java +++ b/src/test/java/it/cavallium/dbengine/MemoryTemporaryDbGenerator.java @@ -42,7 +42,8 @@ public class MemoryTemporaryDbGenerator implements TemporaryDbGenerator { Nullableboolean.empty(), Nullableboolean.empty(), false, - MAX_IN_MEMORY_RESULT_ENTRIES + MAX_IN_MEMORY_RESULT_ENTRIES, + LuceneUtils.getDefaultMergePolicy() ); @Override