2020-12-07 22:15:18 +01:00
|
|
|
package it.cavallium.dbengine.database.disk;
|
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
import io.netty.buffer.ByteBufAllocator;
|
2021-01-30 00:24:55 +01:00
|
|
|
import it.cavallium.dbengine.database.Column;
|
2021-07-01 21:19:52 +02:00
|
|
|
import it.cavallium.dbengine.client.DatabaseOptions;
|
2021-01-30 00:24:55 +01:00
|
|
|
import it.cavallium.dbengine.database.LLKeyValueDatabase;
|
|
|
|
import it.cavallium.dbengine.database.LLSnapshot;
|
2021-02-13 01:31:24 +01:00
|
|
|
import it.cavallium.dbengine.database.UpdateMode;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
|
|
|
import java.nio.file.Paths;
|
|
|
|
import java.nio.file.attribute.BasicFileAttributes;
|
|
|
|
import java.time.Duration;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.HashSet;
|
|
|
|
import java.util.LinkedList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
2021-03-21 13:06:54 +01:00
|
|
|
import java.util.concurrent.ThreadLocalRandom;
|
|
|
|
import java.util.concurrent.TimeUnit;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
2021-03-21 13:06:54 +01:00
|
|
|
import org.apache.commons.lang3.time.StopWatch;
|
2021-07-10 20:52:01 +02:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.BlockBasedTableConfig;
|
|
|
|
import org.rocksdb.BloomFilter;
|
2021-07-17 11:52:08 +02:00
|
|
|
import org.rocksdb.ClockCache;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.ColumnFamilyDescriptor;
|
|
|
|
import org.rocksdb.ColumnFamilyHandle;
|
2021-03-19 20:55:38 +01:00
|
|
|
import org.rocksdb.CompactRangeOptions;
|
2021-05-04 01:21:29 +02:00
|
|
|
import org.rocksdb.CompactionPriority;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.CompactionStyle;
|
|
|
|
import org.rocksdb.CompressionType;
|
|
|
|
import org.rocksdb.DBOptions;
|
|
|
|
import org.rocksdb.DbPath;
|
|
|
|
import org.rocksdb.FlushOptions;
|
2021-07-17 11:52:08 +02:00
|
|
|
import org.rocksdb.IndexType;
|
2021-03-20 12:41:11 +01:00
|
|
|
import org.rocksdb.LRUCache;
|
2021-07-06 22:27:03 +02:00
|
|
|
import org.rocksdb.MemoryUtil;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.Options;
|
2021-05-11 21:59:05 +02:00
|
|
|
import org.rocksdb.RateLimiter;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.RocksDB;
|
|
|
|
import org.rocksdb.RocksDBException;
|
|
|
|
import org.rocksdb.Snapshot;
|
|
|
|
import org.rocksdb.WALRecoveryMode;
|
2021-03-20 12:41:11 +01:00
|
|
|
import org.rocksdb.WriteBufferManager;
|
2021-03-19 20:55:38 +01:00
|
|
|
import org.warp.commonutils.log.Logger;
|
|
|
|
import org.warp.commonutils.log.LoggerFactory;
|
2021-01-30 01:42:37 +01:00
|
|
|
import reactor.core.publisher.Mono;
|
2021-02-01 02:21:53 +01:00
|
|
|
import reactor.core.scheduler.Scheduler;
|
2021-01-30 01:42:37 +01:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
|
|
|
|
|
|
|
static {
|
|
|
|
RocksDB.loadLibrary();
|
|
|
|
}
|
|
|
|
|
2021-03-19 20:55:38 +01:00
|
|
|
protected static final Logger logger = LoggerFactory.getLogger(LLLocalKeyValueDatabase.class);
|
2020-12-07 22:15:18 +01:00
|
|
|
private static final ColumnFamilyDescriptor DEFAULT_COLUMN_FAMILY = new ColumnFamilyDescriptor(
|
|
|
|
RocksDB.DEFAULT_COLUMN_FAMILY);
|
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
private final ByteBufAllocator allocator;
|
2021-02-01 02:21:53 +01:00
|
|
|
private final Scheduler dbScheduler;
|
2021-06-27 15:40:56 +02:00
|
|
|
|
|
|
|
// Configurations
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
private final Path dbPath;
|
|
|
|
private final String name;
|
2021-06-27 15:40:56 +02:00
|
|
|
private final DatabaseOptions databaseOptions;
|
|
|
|
|
2021-06-19 16:26:54 +02:00
|
|
|
private final boolean enableColumnsBug;
|
2020-12-07 22:15:18 +01:00
|
|
|
private RocksDB db;
|
|
|
|
private final Map<Column, ColumnFamilyHandle> handles;
|
|
|
|
private final ConcurrentHashMap<Long, Snapshot> snapshotsHandles = new ConcurrentHashMap<>();
|
|
|
|
private final AtomicLong nextSnapshotNumbers = new AtomicLong(1);
|
|
|
|
|
2021-06-27 15:40:56 +02:00
|
|
|
@SuppressWarnings("SwitchStatementWithTooFewBranches")
|
2021-05-03 21:41:51 +02:00
|
|
|
public LLLocalKeyValueDatabase(ByteBufAllocator allocator,
|
|
|
|
String name,
|
2021-07-10 20:52:01 +02:00
|
|
|
@Nullable Path path,
|
2021-05-03 21:41:51 +02:00
|
|
|
List<Column> columns,
|
|
|
|
List<ColumnFamilyHandle> handles,
|
2021-06-27 15:40:56 +02:00
|
|
|
DatabaseOptions databaseOptions) throws IOException {
|
|
|
|
this.name = name;
|
2021-05-03 21:41:51 +02:00
|
|
|
this.allocator = allocator;
|
2021-06-27 15:40:56 +02:00
|
|
|
Options rocksdbOptions = openRocksDb(path, databaseOptions);
|
2020-12-07 22:15:18 +01:00
|
|
|
try {
|
|
|
|
List<ColumnFamilyDescriptor> descriptors = new LinkedList<>();
|
2021-01-31 12:43:28 +01:00
|
|
|
descriptors
|
|
|
|
.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY));
|
2020-12-07 22:15:18 +01:00
|
|
|
for (Column column : columns) {
|
|
|
|
descriptors
|
2021-06-19 16:26:54 +02:00
|
|
|
.add(new ColumnFamilyDescriptor(column.name().getBytes(StandardCharsets.US_ASCII)));
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get databases directory path
|
|
|
|
Path databasesDirPath = path.toAbsolutePath().getParent();
|
|
|
|
String dbPathString = databasesDirPath.toString() + File.separatorChar + path.getFileName();
|
|
|
|
Path dbPath = Paths.get(dbPathString);
|
|
|
|
this.dbPath = dbPath;
|
2021-06-27 15:40:56 +02:00
|
|
|
|
|
|
|
// Set options
|
|
|
|
this.databaseOptions = databaseOptions;
|
|
|
|
|
|
|
|
int threadCap;
|
|
|
|
if (databaseOptions.lowMemory()) {
|
|
|
|
threadCap = Runtime.getRuntime().availableProcessors();
|
2021-06-27 16:33:23 +02:00
|
|
|
} else {
|
|
|
|
// 8 or more
|
|
|
|
threadCap = Math.max(8, Runtime.getRuntime().availableProcessors());
|
2021-06-27 15:40:56 +02:00
|
|
|
}
|
|
|
|
this.dbScheduler = Schedulers.newBoundedElastic(threadCap,
|
2021-04-15 00:00:42 +02:00
|
|
|
Schedulers.DEFAULT_BOUNDED_ELASTIC_QUEUESIZE,
|
|
|
|
"db-" + name,
|
|
|
|
60,
|
|
|
|
true
|
|
|
|
);
|
2021-06-27 15:40:56 +02:00
|
|
|
this.enableColumnsBug = "true".equals(databaseOptions.extraFlags().getOrDefault("enableColumnBug", "false"));
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-06-27 15:40:56 +02:00
|
|
|
createIfNotExists(descriptors, rocksdbOptions, databaseOptions, dbPath, dbPathString);
|
2021-06-09 02:56:53 +02:00
|
|
|
|
2021-06-25 23:47:53 +02:00
|
|
|
while (true) {
|
|
|
|
try {
|
|
|
|
// a factory method that returns a RocksDB instance
|
2021-06-27 15:40:56 +02:00
|
|
|
this.db = RocksDB.open(new DBOptions(rocksdbOptions),
|
2021-06-25 23:47:53 +02:00
|
|
|
dbPathString,
|
2021-07-10 20:52:01 +02:00
|
|
|
descriptors,
|
2021-06-25 23:47:53 +02:00
|
|
|
handles
|
|
|
|
);
|
|
|
|
break;
|
|
|
|
} catch (RocksDBException ex) {
|
|
|
|
switch (ex.getMessage()) {
|
|
|
|
case "Direct I/O is not supported by the specified DB." -> {
|
|
|
|
logger.warn(ex.getLocalizedMessage());
|
2021-06-27 15:40:56 +02:00
|
|
|
rocksdbOptions
|
2021-06-25 23:47:53 +02:00
|
|
|
.setUseDirectReads(false)
|
2021-06-27 15:06:48 +02:00
|
|
|
.setUseDirectIoForFlushAndCompaction(false)
|
2021-06-27 15:40:56 +02:00
|
|
|
.setAllowMmapReads(databaseOptions.allowMemoryMapping())
|
|
|
|
.setAllowMmapWrites(databaseOptions.allowMemoryMapping());
|
2021-06-25 23:47:53 +02:00
|
|
|
}
|
|
|
|
default -> throw ex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
this.handles = new HashMap<>();
|
2021-06-27 15:40:56 +02:00
|
|
|
if (enableColumnsBug && !databaseOptions.inMemory()) {
|
2021-06-19 16:26:54 +02:00
|
|
|
for (int i = 0; i < columns.size(); i++) {
|
|
|
|
this.handles.put(columns.get(i), handles.get(i));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
handles: for (ColumnFamilyHandle handle : handles) {
|
|
|
|
for (Column column : columns) {
|
|
|
|
if (Arrays.equals(column.name().getBytes(StandardCharsets.US_ASCII), handle.getName())) {
|
|
|
|
this.handles.put(column, handle);
|
|
|
|
continue handles;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-03-21 13:06:54 +01:00
|
|
|
// compactDb(db, handles);
|
2020-12-07 22:15:18 +01:00
|
|
|
flushDb(db, handles);
|
|
|
|
} catch (RocksDBException ex) {
|
|
|
|
throw new IOException(ex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getDatabaseName() {
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
private void flushAndCloseDb(RocksDB db, List<ColumnFamilyHandle> handles)
|
|
|
|
throws RocksDBException {
|
|
|
|
flushDb(db, handles);
|
|
|
|
|
|
|
|
for (ColumnFamilyHandle handle : handles) {
|
2021-07-06 22:27:03 +02:00
|
|
|
try {
|
|
|
|
handle.close();
|
|
|
|
} catch (Exception ex) {
|
|
|
|
logger.error("Can't close column family", ex);
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-07-01 21:19:52 +02:00
|
|
|
try {
|
|
|
|
db.closeE();
|
|
|
|
} catch (RocksDBException ex) {
|
|
|
|
if ("Cannot close DB with unreleased snapshot.".equals(ex.getMessage())) {
|
2021-07-10 20:52:01 +02:00
|
|
|
snapshotsHandles.forEach((id, snapshot) -> {
|
2021-07-01 21:19:52 +02:00
|
|
|
try {
|
|
|
|
db.releaseSnapshot(snapshot);
|
|
|
|
} catch (Exception ex2) {
|
|
|
|
// ignore exception
|
|
|
|
logger.debug("Failed to release snapshot " + id, ex2);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
db.closeE();
|
|
|
|
}
|
|
|
|
throw ex;
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private void flushDb(RocksDB db, List<ColumnFamilyHandle> handles) throws RocksDBException {
|
|
|
|
// force flush the database
|
2021-07-06 22:27:03 +02:00
|
|
|
try (var flushOptions = new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true)) {
|
|
|
|
db.flush(flushOptions);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-07-06 22:27:03 +02:00
|
|
|
try (var flushOptions = new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true)) {
|
|
|
|
db.flush(flushOptions, handles);
|
|
|
|
}
|
|
|
|
db.flushWal(true);
|
|
|
|
db.syncWal();
|
2020-12-07 22:15:18 +01:00
|
|
|
// end force flush
|
|
|
|
}
|
|
|
|
|
2021-04-03 19:09:06 +02:00
|
|
|
@SuppressWarnings("unused")
|
|
|
|
private void compactDb(RocksDB db, List<ColumnFamilyHandle> handles) {
|
2021-03-19 20:55:38 +01:00
|
|
|
// force compact the database
|
|
|
|
for (ColumnFamilyHandle cfh : handles) {
|
|
|
|
var t = new Thread(() -> {
|
2021-03-21 13:06:54 +01:00
|
|
|
int r = ThreadLocalRandom.current().nextInt();
|
|
|
|
var s = StopWatch.createStarted();
|
2021-03-19 20:55:38 +01:00
|
|
|
try {
|
|
|
|
// Range rangeToCompact = db.suggestCompactRange(cfh);
|
2021-03-21 13:06:54 +01:00
|
|
|
logger.info("Compacting range {}", r);
|
|
|
|
db.compactRange(cfh, null, null, new CompactRangeOptions()
|
|
|
|
.setAllowWriteStall(true)
|
|
|
|
.setExclusiveManualCompaction(true)
|
|
|
|
.setChangeLevel(false));
|
2021-03-19 20:55:38 +01:00
|
|
|
} catch (RocksDBException e) {
|
|
|
|
if ("Database shutdown".equalsIgnoreCase(e.getMessage())) {
|
|
|
|
logger.warn("Compaction cancelled: database shutdown");
|
|
|
|
} else {
|
|
|
|
logger.warn("Failed to compact range", e);
|
|
|
|
}
|
|
|
|
}
|
2021-03-21 13:06:54 +01:00
|
|
|
logger.info("Compacted range {} in {} milliseconds", r, s.getTime(TimeUnit.MILLISECONDS));
|
2021-03-19 20:55:38 +01:00
|
|
|
}, "Compaction");
|
|
|
|
t.setDaemon(true);
|
|
|
|
t.start();
|
|
|
|
}
|
|
|
|
// end force compact
|
|
|
|
}
|
|
|
|
|
2021-05-04 01:21:29 +02:00
|
|
|
@SuppressWarnings({"CommentedOutCode", "PointlessArithmeticExpression"})
|
2021-07-10 20:52:01 +02:00
|
|
|
private static Options openRocksDb(@Nullable Path path, DatabaseOptions databaseOptions) throws IOException {
|
2020-12-07 22:15:18 +01:00
|
|
|
// Get databases directory path
|
2021-07-10 20:52:01 +02:00
|
|
|
Path databasesDirPath;
|
|
|
|
if (path != null) {
|
|
|
|
databasesDirPath = path.toAbsolutePath().getParent();
|
|
|
|
// Create base directories
|
|
|
|
if (Files.notExists(databasesDirPath)) {
|
|
|
|
Files.createDirectories(databasesDirPath);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
databasesDirPath = null;
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// the Options class contains a set of configurable DB options
|
|
|
|
// that determines the behaviour of the database.
|
|
|
|
var options = new Options();
|
|
|
|
options.setCreateIfMissing(true);
|
2021-07-06 22:27:03 +02:00
|
|
|
options.setCreateMissingColumnFamilies(true);
|
2020-12-07 22:15:18 +01:00
|
|
|
options.setCompactionStyle(CompactionStyle.LEVEL);
|
|
|
|
options.setTargetFileSizeBase(64 * 1024 * 1024); // 64MiB sst file
|
2021-05-11 21:59:05 +02:00
|
|
|
options.setTargetFileSizeMultiplier(2); // Each level is 2 times the previous level
|
|
|
|
options.setCompressionPerLevel(List.of(CompressionType.NO_COMPRESSION,
|
|
|
|
CompressionType.SNAPPY_COMPRESSION,
|
|
|
|
CompressionType.SNAPPY_COMPRESSION
|
|
|
|
));
|
|
|
|
//options.setMaxBytesForLevelBase(4 * 256 * 1024 * 1024); // 4 times the sst file
|
2020-12-07 22:15:18 +01:00
|
|
|
options.setManualWalFlush(false);
|
|
|
|
options.setMinWriteBufferNumberToMerge(3);
|
|
|
|
options.setMaxWriteBufferNumber(4);
|
|
|
|
options.setAvoidFlushDuringShutdown(false); // Flush all WALs during shutdown
|
|
|
|
options.setAvoidFlushDuringRecovery(false); // Flush all WALs during startup
|
2021-06-27 15:40:56 +02:00
|
|
|
options.setWalRecoveryMode(databaseOptions.absoluteConsistency()
|
2021-02-01 11:00:27 +01:00
|
|
|
? WALRecoveryMode.AbsoluteConsistency
|
|
|
|
: WALRecoveryMode.PointInTimeRecovery); // Crash if the WALs are corrupted.Default: TolerateCorruptedTailRecords
|
2020-12-07 22:15:18 +01:00
|
|
|
options.setDeleteObsoleteFilesPeriodMicros(20 * 1000000); // 20 seconds
|
|
|
|
options.setPreserveDeletes(false);
|
|
|
|
options.setKeepLogFileNum(10);
|
2021-03-20 12:41:11 +01:00
|
|
|
options.setAllowFAllocate(true);
|
2021-05-11 21:59:05 +02:00
|
|
|
options.setRateLimiter(new RateLimiter(10L * 1024L * 1024L)); // 10MiB/s max compaction write speed
|
2021-07-10 20:52:01 +02:00
|
|
|
|
|
|
|
List<DbPath> paths = List.of(new DbPath(databasesDirPath.resolve(path.getFileName() + "_hot"),
|
2021-06-09 02:56:53 +02:00
|
|
|
10L * 1024L * 1024L * 1024L), // 10GiB
|
2021-05-11 21:59:05 +02:00
|
|
|
new DbPath(databasesDirPath.resolve(path.getFileName() + "_cold"),
|
2021-05-12 19:02:51 +02:00
|
|
|
100L * 1024L * 1024L * 1024L), // 100GiB
|
2021-05-11 21:59:05 +02:00
|
|
|
new DbPath(databasesDirPath.resolve(path.getFileName() + "_colder"),
|
2021-06-09 02:56:53 +02:00
|
|
|
600L * 1024L * 1024L * 1024L)); // 600GiB
|
|
|
|
options.setDbPaths(paths);
|
|
|
|
options.setCfPaths(paths);
|
2020-12-07 22:15:18 +01:00
|
|
|
// Direct I/O parameters. Removed because they use too much disk.
|
|
|
|
//options.setUseDirectReads(true);
|
|
|
|
//options.setUseDirectIoForFlushAndCompaction(true);
|
|
|
|
//options.setWritableFileMaxBufferSize(1024 * 1024); // 1MB by default
|
2021-03-22 20:02:19 +01:00
|
|
|
//options.setCompactionReadaheadSize(2 * 1024 * 1024); // recommend at least 2MB
|
2021-03-20 12:41:11 +01:00
|
|
|
final BlockBasedTableConfig tableOptions = new BlockBasedTableConfig();
|
2021-06-27 15:40:56 +02:00
|
|
|
if (databaseOptions.lowMemory()) {
|
2020-12-07 22:15:18 +01:00
|
|
|
// LOW MEMORY
|
|
|
|
options
|
2021-05-18 01:10:30 +02:00
|
|
|
.setLevelCompactionDynamicLevelBytes(false)
|
|
|
|
.setBytesPerSync(0) // default
|
|
|
|
.setWalBytesPerSync(0) // default
|
2020-12-07 22:15:18 +01:00
|
|
|
.setIncreaseParallelism(1)
|
2021-05-05 15:16:32 +02:00
|
|
|
.setMaxOpenFiles(15)
|
2020-12-07 22:15:18 +01:00
|
|
|
.optimizeLevelStyleCompaction(1024 * 1024) // 1MiB of ram will be used for level style compaction
|
|
|
|
.setWriteBufferSize(1024 * 1024) // 1MB
|
2021-05-18 01:10:30 +02:00
|
|
|
.setWalTtlSeconds(0)
|
|
|
|
.setWalSizeLimitMB(0) // 16MB
|
|
|
|
.setMaxTotalWalSize(0) // automatic
|
|
|
|
;
|
|
|
|
tableOptions
|
2021-07-17 11:52:08 +02:00
|
|
|
.setIndexType(IndexType.kTwoLevelIndexSearch)
|
|
|
|
.setPartitionFilters(true)
|
|
|
|
.setMetadataBlockSize(4096)
|
|
|
|
.setBlockCache(new ClockCache(8L * 1024L * 1024L)) // 8MiB
|
|
|
|
.setCacheIndexAndFilterBlocks(true)
|
|
|
|
.setCacheIndexAndFilterBlocksWithHighPriority(true)
|
|
|
|
.setPinL0FilterAndIndexBlocksInCache(true)
|
2020-12-07 22:15:18 +01:00
|
|
|
;
|
2021-07-17 11:52:08 +02:00
|
|
|
options.setWriteBufferManager(new WriteBufferManager(8L * 1024L * 1024L, new ClockCache(8L * 1024L * 1024L))); // 8MiB
|
|
|
|
|
|
|
|
if (databaseOptions.useDirectIO()) {
|
|
|
|
options
|
|
|
|
// Option to enable readahead in compaction
|
|
|
|
// If not set, it will be set to 2MB internally
|
|
|
|
.setCompactionReadaheadSize(2 * 1024 * 1024) // recommend at least 2MB
|
|
|
|
// Option to tune write buffer for direct writes
|
|
|
|
.setWritableFileMaxBufferSize(1024 * 1024)
|
|
|
|
;
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
} else {
|
|
|
|
// HIGH MEMORY
|
|
|
|
options
|
2021-05-18 01:10:30 +02:00
|
|
|
.setLevelCompactionDynamicLevelBytes(true)
|
2020-12-07 22:15:18 +01:00
|
|
|
.setAllowConcurrentMemtableWrite(true)
|
|
|
|
.setEnableWriteThreadAdaptiveYield(true)
|
|
|
|
.setIncreaseParallelism(Runtime.getRuntime().availableProcessors())
|
2021-05-04 01:21:29 +02:00
|
|
|
.setBytesPerSync(1 * 1024 * 1024) // 1MiB
|
2020-12-07 22:15:18 +01:00
|
|
|
.setWalBytesPerSync(10 * 1024 * 1024)
|
2021-07-17 11:52:08 +02:00
|
|
|
.setMaxOpenFiles(30)
|
2020-12-07 22:15:18 +01:00
|
|
|
.optimizeLevelStyleCompaction(
|
|
|
|
128 * 1024 * 1024) // 128MiB of ram will be used for level style compaction
|
2021-03-20 12:41:11 +01:00
|
|
|
.setWriteBufferSize(64 * 1024 * 1024) // 64MB
|
2021-05-18 01:10:30 +02:00
|
|
|
.setWalTtlSeconds(30) // flush wal after 30 seconds
|
2020-12-07 22:15:18 +01:00
|
|
|
.setWalSizeLimitMB(1024) // 1024MB
|
2021-03-20 12:41:11 +01:00
|
|
|
.setMaxTotalWalSize(2L * 1024L * 1024L * 1024L) // 2GiB max wal directory size
|
2020-12-07 22:15:18 +01:00
|
|
|
;
|
2021-05-18 01:10:30 +02:00
|
|
|
tableOptions
|
2021-07-17 11:52:08 +02:00
|
|
|
.setIndexType(IndexType.kTwoLevelIndexSearch)
|
|
|
|
.setPartitionFilters(true)
|
|
|
|
.setMetadataBlockSize(4096)
|
|
|
|
.setBlockCache(new ClockCache(512L * 1024L * 1024L)) // 512MiB
|
2021-05-18 01:10:30 +02:00
|
|
|
.setCacheIndexAndFilterBlocks(true)
|
2021-07-17 11:52:08 +02:00
|
|
|
.setCacheIndexAndFilterBlocksWithHighPriority(true)
|
2021-05-18 01:10:30 +02:00
|
|
|
.setPinL0FilterAndIndexBlocksInCache(true)
|
|
|
|
;
|
|
|
|
final BloomFilter bloomFilter = new BloomFilter(10, false);
|
|
|
|
tableOptions.setOptimizeFiltersForMemory(true);
|
|
|
|
tableOptions.setFilterPolicy(bloomFilter);
|
2021-07-17 11:52:08 +02:00
|
|
|
options.setWriteBufferManager(new WriteBufferManager(256L * 1024L * 1024L, new ClockCache(128L * 1024L * 1024L))); // 128MiB
|
2021-06-25 23:47:53 +02:00
|
|
|
|
2021-06-27 15:40:56 +02:00
|
|
|
if (databaseOptions.useDirectIO()) {
|
2021-06-25 23:47:53 +02:00
|
|
|
options
|
|
|
|
// Option to enable readahead in compaction
|
|
|
|
// If not set, it will be set to 2MB internally
|
2021-07-17 11:52:08 +02:00
|
|
|
.setCompactionReadaheadSize(4 * 1024 * 1024) // recommend at least 2MB
|
2021-06-25 23:47:53 +02:00
|
|
|
// Option to tune write buffer for direct writes
|
2021-07-17 11:52:08 +02:00
|
|
|
.setWritableFileMaxBufferSize(4 * 1024 * 1024)
|
2021-06-25 23:47:53 +02:00
|
|
|
;
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-07-17 11:52:08 +02:00
|
|
|
if (databaseOptions.useDirectIO()) {
|
|
|
|
options
|
|
|
|
.setAllowMmapReads(false)
|
|
|
|
.setAllowMmapWrites(false)
|
|
|
|
.setUseDirectReads(true)
|
|
|
|
;
|
|
|
|
} else {
|
|
|
|
options
|
|
|
|
.setAllowMmapReads(databaseOptions.allowMemoryMapping())
|
|
|
|
.setAllowMmapWrites(databaseOptions.allowMemoryMapping());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!databaseOptions.allowMemoryMapping()) {
|
|
|
|
options.setUseDirectIoForFlushAndCompaction(true);
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-05-04 01:21:29 +02:00
|
|
|
tableOptions.setBlockSize(16 * 1024); // 16MiB
|
2020-12-07 22:15:18 +01:00
|
|
|
options.setTableFormatConfig(tableOptions);
|
2021-05-04 01:21:29 +02:00
|
|
|
options.setCompactionPriority(CompactionPriority.MinOverlappingRatio);
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2021-04-30 19:15:04 +02:00
|
|
|
private void createIfNotExists(List<ColumnFamilyDescriptor> descriptors,
|
|
|
|
Options options,
|
2021-06-27 15:40:56 +02:00
|
|
|
DatabaseOptions databaseOptions,
|
2021-04-30 19:15:04 +02:00
|
|
|
Path dbPath,
|
|
|
|
String dbPathString) throws RocksDBException {
|
2021-06-27 15:40:56 +02:00
|
|
|
if (databaseOptions.inMemory()) {
|
2021-04-30 19:15:04 +02:00
|
|
|
return;
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
if (Files.notExists(dbPath)) {
|
|
|
|
// Check if handles are all different
|
|
|
|
var descriptorsSet = new HashSet<>(descriptors);
|
|
|
|
if (descriptorsSet.size() != descriptors.size()) {
|
|
|
|
throw new IllegalArgumentException("Descriptors must be unique!");
|
|
|
|
}
|
|
|
|
|
|
|
|
List<ColumnFamilyDescriptor> descriptorsToCreate = new LinkedList<>(descriptors);
|
|
|
|
descriptorsToCreate
|
|
|
|
.removeIf((cf) -> Arrays.equals(cf.getName(), DEFAULT_COLUMN_FAMILY.getName()));
|
|
|
|
|
2021-01-30 00:24:55 +01:00
|
|
|
/*
|
|
|
|
SkipStatsUpdateOnDbOpen = true because this RocksDB.open session is used only to add just some columns
|
2020-12-07 22:15:18 +01:00
|
|
|
*/
|
|
|
|
//var dbOptionsFastLoadSlowEdit = options.setSkipStatsUpdateOnDbOpen(true);
|
|
|
|
|
|
|
|
LinkedList<ColumnFamilyHandle> handles = new LinkedList<>();
|
|
|
|
|
|
|
|
this.db = RocksDB.open(options, dbPathString);
|
|
|
|
for (ColumnFamilyDescriptor columnFamilyDescriptor : descriptorsToCreate) {
|
|
|
|
handles.add(db.createColumnFamily(columnFamilyDescriptor));
|
|
|
|
}
|
|
|
|
|
2021-06-27 15:40:56 +02:00
|
|
|
flushAndCloseDb(db, handles);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-01-31 15:47:48 +01:00
|
|
|
public Mono<LLLocalSingleton> getSingleton(byte[] singletonListColumnName, byte[] name, byte[] defaultValue) {
|
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> new LLLocalSingleton(db,
|
2021-06-19 16:26:54 +02:00
|
|
|
getCfh(singletonListColumnName),
|
2021-01-31 15:47:48 +01:00
|
|
|
(snapshot) -> snapshotsHandles.get(snapshot.getSequenceNumber()),
|
|
|
|
LLLocalKeyValueDatabase.this.name,
|
|
|
|
name,
|
2021-02-11 22:27:43 +01:00
|
|
|
dbScheduler,
|
2021-01-31 15:47:48 +01:00
|
|
|
defaultValue
|
|
|
|
))
|
2021-03-04 22:01:50 +01:00
|
|
|
.onErrorMap(cause -> new IOException("Failed to read " + Arrays.toString(name), cause))
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-02-13 01:31:24 +01:00
|
|
|
public Mono<LLLocalDictionary> getDictionary(byte[] columnName, UpdateMode updateMode) {
|
2021-01-31 15:47:48 +01:00
|
|
|
return Mono
|
2021-06-26 02:35:33 +02:00
|
|
|
.fromCallable(() -> new LLLocalDictionary(
|
|
|
|
allocator,
|
|
|
|
db,
|
|
|
|
getCfh(columnName),
|
|
|
|
name,
|
|
|
|
Column.toString(columnName),
|
|
|
|
dbScheduler,
|
|
|
|
(snapshot) -> snapshotsHandles.get(snapshot.getSequenceNumber()),
|
2021-06-29 23:31:02 +02:00
|
|
|
updateMode,
|
|
|
|
databaseOptions
|
2021-06-26 02:35:33 +02:00
|
|
|
))
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-06-19 16:26:54 +02:00
|
|
|
private ColumnFamilyHandle getCfh(byte[] columnName) throws RocksDBException {
|
|
|
|
ColumnFamilyHandle cfh = handles.get(Column.special(Column.toString(columnName)));
|
2021-06-27 16:52:45 +02:00
|
|
|
//noinspection RedundantIfStatement
|
2021-07-18 19:37:24 +02:00
|
|
|
if (databaseOptions.enableDbAssertionsWhenUsingAssertions()) {
|
|
|
|
if (!enableColumnsBug) {
|
|
|
|
assert Arrays.equals(cfh.getName(), columnName);
|
|
|
|
}
|
2021-06-19 16:26:54 +02:00
|
|
|
}
|
|
|
|
return cfh;
|
|
|
|
}
|
|
|
|
|
2021-06-27 15:40:56 +02:00
|
|
|
public DatabaseOptions getDatabaseOptions() {
|
|
|
|
return databaseOptions;
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-01-31 15:47:48 +01:00
|
|
|
public Mono<Long> getProperty(String propertyName) {
|
|
|
|
return Mono.fromCallable(() -> db.getAggregatedLongProperty(propertyName))
|
2021-03-04 22:01:50 +01:00
|
|
|
.onErrorMap(cause -> new IOException("Failed to read " + propertyName, cause))
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-06-27 15:06:48 +02:00
|
|
|
@Override
|
|
|
|
public Mono<Void> verifyChecksum() {
|
|
|
|
return Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
db.verifyChecksum();
|
|
|
|
return null;
|
|
|
|
})
|
|
|
|
.onErrorMap(cause -> new IOException("Failed to verify checksum of database \""
|
|
|
|
+ getDatabaseName() + "\"", cause))
|
|
|
|
.subscribeOn(dbScheduler);
|
|
|
|
}
|
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
@Override
|
|
|
|
public ByteBufAllocator getAllocator() {
|
|
|
|
return allocator;
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-01-30 01:42:37 +01:00
|
|
|
public Mono<LLSnapshot> takeSnapshot() {
|
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
|
|
|
var snapshot = db.getSnapshot();
|
|
|
|
long currentSnapshotSequenceNumber = nextSnapshotNumbers.getAndIncrement();
|
|
|
|
this.snapshotsHandles.put(currentSnapshotSequenceNumber, snapshot);
|
|
|
|
return new LLSnapshot(currentSnapshotSequenceNumber);
|
|
|
|
})
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-01-30 01:42:37 +01:00
|
|
|
public Mono<Void> releaseSnapshot(LLSnapshot snapshot) {
|
|
|
|
return Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
Snapshot dbSnapshot = this.snapshotsHandles.remove(snapshot.getSequenceNumber());
|
|
|
|
if (dbSnapshot == null) {
|
|
|
|
throw new IOException("Snapshot " + snapshot.getSequenceNumber() + " not found!");
|
|
|
|
}
|
|
|
|
db.releaseSnapshot(dbSnapshot);
|
|
|
|
return null;
|
|
|
|
})
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-01-31 19:52:47 +01:00
|
|
|
public Mono<Void> close() {
|
|
|
|
return Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
try {
|
|
|
|
flushAndCloseDb(db, new ArrayList<>(handles.values()));
|
|
|
|
deleteUnusedOldLogFiles();
|
|
|
|
} catch (RocksDBException e) {
|
|
|
|
throw new IOException(e);
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
})
|
2021-03-04 22:01:50 +01:00
|
|
|
.onErrorMap(cause -> new IOException("Failed to close", cause))
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Call this method ONLY AFTER flushing completely a db and closing it!
|
|
|
|
*/
|
2021-01-30 00:24:55 +01:00
|
|
|
@SuppressWarnings("unused")
|
2020-12-07 22:15:18 +01:00
|
|
|
private void deleteUnusedOldLogFiles() {
|
|
|
|
Path basePath = dbPath;
|
|
|
|
try {
|
|
|
|
Files
|
|
|
|
.walk(basePath, 1)
|
|
|
|
.filter(p -> !p.equals(basePath))
|
|
|
|
.filter(p -> {
|
|
|
|
var fileName = p.getFileName().toString();
|
|
|
|
if (fileName.startsWith("LOG.old.")) {
|
|
|
|
var parts = fileName.split("\\.");
|
|
|
|
if (parts.length == 3) {
|
|
|
|
try {
|
|
|
|
long nameSuffix = Long.parseUnsignedLong(parts[2]);
|
|
|
|
return true;
|
|
|
|
} catch (NumberFormatException ex) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (fileName.endsWith(".log")) {
|
|
|
|
var parts = fileName.split("\\.");
|
|
|
|
if (parts.length == 2) {
|
|
|
|
try {
|
|
|
|
int name = Integer.parseUnsignedInt(parts[0]);
|
|
|
|
return true;
|
|
|
|
} catch (NumberFormatException ex) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
})
|
|
|
|
.filter(p -> {
|
|
|
|
try {
|
|
|
|
BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class);
|
|
|
|
if (attrs.isRegularFile() && !attrs.isSymbolicLink() && !attrs.isDirectory()) {
|
|
|
|
long ctime = attrs.creationTime().toMillis();
|
|
|
|
long atime = attrs.lastAccessTime().toMillis();
|
|
|
|
long mtime = attrs.lastModifiedTime().toMillis();
|
|
|
|
long lastTime = Math.max(Math.max(ctime, atime), mtime);
|
|
|
|
long safeTime;
|
|
|
|
if (p.getFileName().toString().startsWith("LOG.old.")) {
|
|
|
|
safeTime = System.currentTimeMillis() - Duration.ofHours(24).toMillis();
|
|
|
|
} else {
|
|
|
|
safeTime = System.currentTimeMillis() - Duration.ofHours(12).toMillis();
|
|
|
|
}
|
|
|
|
if (lastTime < safeTime) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (IOException ex) {
|
2021-03-19 20:55:38 +01:00
|
|
|
logger.error("Error when deleting unused log files", ex);
|
2020-12-07 22:15:18 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
})
|
|
|
|
.forEach(path -> {
|
|
|
|
try {
|
|
|
|
Files.deleteIfExists(path);
|
|
|
|
System.out.println("Deleted log file \"" + path + "\"");
|
|
|
|
} catch (IOException e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
} catch (IOException ex) {
|
|
|
|
ex.printStackTrace();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|