CavalliumDBEngine/src/main/java/it/cavallium/dbengine/database/disk/LLLocalKeyValueDatabase.java

1672 lines
58 KiB
Java
Raw Normal View History

2020-12-07 22:15:18 +01:00
package it.cavallium.dbengine.database.disk;
2021-09-10 12:13:52 +02:00
import static it.cavallium.dbengine.database.LLUtils.MARKER_ROCKSDB;
import static it.cavallium.dbengine.database.LLUtils.mapList;
import static it.cavallium.dbengine.utils.StreamUtils.collect;
import static it.cavallium.dbengine.utils.StreamUtils.iterating;
2022-06-09 00:49:08 +02:00
import static java.lang.Boolean.parseBoolean;
2022-05-04 01:21:56 +02:00
import static java.util.Objects.requireNonNull;
2022-04-04 22:55:28 +02:00
import static org.rocksdb.ColumnFamilyOptionsInterface.DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET;
2021-09-10 12:13:52 +02:00
import io.micrometer.core.instrument.MeterRegistry;
2022-01-15 20:00:10 +01:00
import io.micrometer.core.instrument.Tag;
2021-12-30 18:20:56 +01:00
import io.micrometer.core.instrument.Timer;
2023-04-20 10:20:17 +02:00
import it.cavallium.datagen.nativedata.NullableString;
2022-08-15 23:07:17 +02:00
import it.cavallium.dbengine.client.Backuppable;
2022-01-15 20:00:10 +01:00
import it.cavallium.dbengine.client.MemoryStats;
2022-05-04 01:21:56 +02:00
import it.cavallium.dbengine.database.ColumnProperty;
2022-03-02 12:34:30 +01:00
import it.cavallium.dbengine.database.ColumnUtils;
2021-01-30 00:24:55 +01:00
import it.cavallium.dbengine.database.LLKeyValueDatabase;
import it.cavallium.dbengine.database.LLSnapshot;
2022-01-26 14:22:54 +01:00
import it.cavallium.dbengine.database.LLUtils;
2022-05-04 01:21:56 +02:00
import it.cavallium.dbengine.database.RocksDBLongProperty;
import it.cavallium.dbengine.database.RocksDBMapProperty;
import it.cavallium.dbengine.database.RocksDBStringProperty;
2022-04-09 02:45:42 +02:00
import it.cavallium.dbengine.database.TableWithProperties;
2021-02-13 01:31:24 +01:00
import it.cavallium.dbengine.database.UpdateMode;
2022-03-02 12:34:30 +01:00
import it.cavallium.dbengine.rpc.current.data.Column;
2022-04-08 14:32:47 +02:00
import it.cavallium.dbengine.rpc.current.data.ColumnOptions;
2022-03-22 11:50:30 +01:00
import it.cavallium.dbengine.rpc.current.data.DatabaseLevel;
2022-03-02 12:34:30 +01:00
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
import it.cavallium.dbengine.rpc.current.data.DatabaseVolume;
2022-04-08 14:32:47 +02:00
import it.cavallium.dbengine.rpc.current.data.NamedColumnOptions;
2022-08-16 19:50:53 +02:00
import it.cavallium.dbengine.rpc.current.data.NoFilter;
2020-12-07 22:15:18 +01:00
import java.io.File;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
2020-12-07 22:15:18 +01:00
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CompletionException;
2020-12-07 22:15:18 +01:00
import java.util.concurrent.ConcurrentHashMap;
2021-03-21 13:06:54 +01:00
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
2020-12-07 22:15:18 +01:00
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.StampedLock;
2022-04-08 14:32:47 +02:00
import java.util.stream.Collectors;
2022-04-15 16:49:01 +02:00
import java.util.stream.Stream;
2021-03-21 13:06:54 +01:00
import org.apache.commons.lang3.time.StopWatch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
2023-05-22 23:08:37 +02:00
import org.jetbrains.annotations.NotNull;
2021-07-10 20:52:01 +02:00
import org.jetbrains.annotations.Nullable;
import org.rocksdb.AbstractImmutableNativeReference;
2020-12-07 22:15:18 +01:00
import org.rocksdb.BlockBasedTableConfig;
import org.rocksdb.BloomFilter;
2022-01-12 16:18:31 +01:00
import org.rocksdb.Cache;
2022-03-19 00:08:23 +01:00
import org.rocksdb.ChecksumType;
2020-12-07 22:15:18 +01:00
import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyHandle;
2022-03-10 02:38:57 +01:00
import org.rocksdb.ColumnFamilyOptions;
2021-03-19 20:55:38 +01:00
import org.rocksdb.CompactRangeOptions;
2021-05-04 01:21:29 +02:00
import org.rocksdb.CompactionPriority;
2022-01-12 16:18:31 +01:00
import org.rocksdb.CompressionOptions;
2020-12-07 22:15:18 +01:00
import org.rocksdb.CompressionType;
import org.rocksdb.DBOptions;
2022-04-19 23:23:32 +02:00
import org.rocksdb.DataBlockIndexType;
2020-12-07 22:15:18 +01:00
import org.rocksdb.DbPath;
import org.rocksdb.Env;
2020-12-07 22:15:18 +01:00
import org.rocksdb.FlushOptions;
2021-07-17 11:52:08 +02:00
import org.rocksdb.IndexType;
2022-03-10 02:38:57 +01:00
import org.rocksdb.InfoLogLevel;
import org.rocksdb.IngestExternalFileOptions;
import org.rocksdb.LiveFileMetaData;
2021-10-17 19:52:43 +02:00
import org.rocksdb.OptimisticTransactionDB;
import org.rocksdb.PersistentCache;
2023-02-24 00:18:02 +01:00
import org.rocksdb.PlainTableConfig;
2022-09-26 21:56:56 +02:00
import org.rocksdb.PrepopulateBlobCache;
2020-12-07 22:15:18 +01:00
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.Snapshot;
2022-05-09 22:08:54 +02:00
import org.rocksdb.Statistics;
import org.rocksdb.StatsLevel;
2023-02-24 00:18:02 +01:00
import org.rocksdb.TableFormatConfig;
2022-05-09 22:08:54 +02:00
import org.rocksdb.TickerType;
2021-10-17 19:52:43 +02:00
import org.rocksdb.TransactionDB;
2021-12-27 18:44:54 +01:00
import org.rocksdb.TransactionDBOptions;
import org.rocksdb.TxnDBWritePolicy;
2020-12-07 22:15:18 +01:00
import org.rocksdb.WALRecoveryMode;
2021-03-20 12:41:11 +01:00
import org.rocksdb.WriteBufferManager;
2022-01-12 16:18:31 +01:00
import org.rocksdb.util.SizeUnit;
2020-12-07 22:15:18 +01:00
2022-08-15 23:07:17 +02:00
public class LLLocalKeyValueDatabase extends Backuppable implements LLKeyValueDatabase {
2020-12-07 22:15:18 +01:00
private static final boolean DELETE_LOG_FILES = false;
2023-05-23 00:20:14 +02:00
private static final boolean FOLLOW_ROCKSDB_OPTIMIZATIONS = true;
2022-06-17 01:00:45 +02:00
private static final boolean USE_CLOCK_CACHE
2022-06-18 00:27:02 +02:00
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.clockcache.enable", "false"));
2023-09-03 01:33:58 +02:00
private static final boolean PARANOID_CHECKS
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.checks.paranoid", "true"));
2023-09-26 01:45:28 +02:00
private static final boolean VERIFY_COMPRESSION
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.checks.compression", "false"));
private static final boolean VERIFY_FILE_SIZE
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.checks.filesize", "false"));
private static final boolean PARANOID_FILE_CHECKS
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.checks.paranoidfilechecks", "false"));
private static final boolean FORCE_COLUMN_FAMILY_CONSISTENCY_CHECKS
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.checks.forcecolumnfamilyconsistencychecks", "true"));
2023-09-26 02:43:06 +02:00
private static final InfoLogLevel LOG_LEVEL = InfoLogLevel.getInfoLogLevel(Byte.parseByte(System.getProperty("it.cavallium.dbengine.log.levelcode", "" + InfoLogLevel.WARN_LEVEL.getValue())));
2022-06-17 01:00:45 +02:00
private static final CacheFactory CACHE_FACTORY = USE_CLOCK_CACHE ? new ClockCacheFactory() : new LRUCacheFactory();
2023-05-23 00:20:14 +02:00
private static final boolean ALLOW_SNAPSHOTS = Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.snapshots.allow", "true"));
2020-12-07 22:15:18 +01:00
static {
RocksDB.loadLibrary();
}
protected static final Logger logger = LogManager.getLogger(LLLocalKeyValueDatabase.class);
2020-12-07 22:15:18 +01:00
private final MeterRegistry meterRegistry;
2021-06-27 15:40:56 +02:00
2021-12-30 18:20:56 +01:00
private final Timer snapshotTime;
2021-06-27 15:40:56 +02:00
// Configurations
2020-12-07 22:15:18 +01:00
private final Path dbPath;
private final String name;
2021-06-27 15:40:56 +02:00
private final DatabaseOptions databaseOptions;
2021-06-19 16:26:54 +02:00
private final boolean enableColumnsBug;
2022-05-22 16:48:08 +02:00
private final RocksDBRefs refs = new RocksDBRefs();
2021-10-20 01:51:34 +02:00
private RocksDB db;
2022-05-09 22:08:54 +02:00
private Statistics statistics;
private Cache standardCache;
private final Map<Column, ColumnFamilyHandle> handles;
private final HashMap<String, PersistentCache> persistentCaches;
private final ConcurrentHashMap<Long, Snapshot> snapshotsHandles = new ConcurrentHashMap<>();
2020-12-07 22:15:18 +01:00
private final AtomicLong nextSnapshotNumbers = new AtomicLong(1);
private final StampedLock closeLock = new StampedLock();
2022-06-08 18:52:15 +02:00
private volatile boolean closeRequested = false;
2022-01-15 20:00:10 +01:00
private volatile boolean closed = false;
2020-12-07 22:15:18 +01:00
2021-06-27 15:40:56 +02:00
@SuppressWarnings("SwitchStatementWithTooFewBranches")
2023-05-22 23:08:37 +02:00
public LLLocalKeyValueDatabase(@NotNull MeterRegistry meterRegistry,
2021-05-03 21:41:51 +02:00
String name,
2022-03-02 12:34:30 +01:00
boolean inMemory,
2021-07-10 20:52:01 +02:00
@Nullable Path path,
2021-05-03 21:41:51 +02:00
List<Column> columns,
List<ColumnFamilyHandle> handles,
DatabaseOptions databaseOptions) {
2021-06-27 15:40:56 +02:00
this.name = name;
this.meterRegistry = meterRegistry;
2021-09-02 17:15:40 +02:00
2021-12-30 18:20:56 +01:00
this.snapshotTime = Timer
.builder("db.snapshot.timer")
.publishPercentiles(0.2, 0.5, 0.95)
.publishPercentileHistogram()
.tags("db.name", name)
.register(meterRegistry);
2022-04-15 16:49:01 +02:00
this.enableColumnsBug = "true".equals(databaseOptions.extraFlags().getOrDefault("enableColumnBug", "false"));
if (!enableColumnsBug) {
if (columns.stream().noneMatch(column -> column.name().equals("default"))) {
columns = Stream.concat(Stream.of(Column.of("default")), columns.stream()).toList();
}
}
2022-05-22 16:48:08 +02:00
OptionsWithCache optionsWithCache = openRocksDb(path, databaseOptions, refs);
2022-03-10 02:38:57 +01:00
var rocksdbOptions = optionsWithCache.options();
2020-12-07 22:15:18 +01:00
try {
2022-03-22 11:50:30 +01:00
List<ColumnFamilyDescriptor> descriptors = new ArrayList<>();
2022-03-10 02:38:57 +01:00
2022-05-22 16:48:08 +02:00
var defaultColumnOptions = new ColumnFamilyOptions();
refs.track(defaultColumnOptions);
descriptors.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, defaultColumnOptions));
2022-04-08 14:32:47 +02:00
// Check column names validity
for (NamedColumnOptions columnOption : databaseOptions.columnOptions()) {
if (columns.stream().map(Column::name).noneMatch(columnName -> columnName.equals(columnOption.columnName()))) {
throw new IllegalArgumentException(
"Column " + columnOption.columnName() + " does not exist. Available columns: " + columns
.stream()
.map(Column::name)
.collect(Collectors.joining(", ", "[", "]")));
}
}
2022-06-08 18:52:15 +02:00
var rocksLogger = new RocksLog4jLogger(rocksdbOptions, logger);
this.persistentCaches = new HashMap<>();
2020-12-07 22:15:18 +01:00
for (Column column : columns) {
2022-04-08 14:32:47 +02:00
var columnFamilyOptions = new ColumnFamilyOptions();
2022-05-22 16:48:08 +02:00
refs.track(columnFamilyOptions);
2022-04-08 14:32:47 +02:00
2023-09-26 01:45:28 +02:00
columnFamilyOptions
.setForceConsistencyChecks(FORCE_COLUMN_FAMILY_CONSISTENCY_CHECKS)
.setParanoidFileChecks(PARANOID_FILE_CHECKS);
2022-04-08 14:32:47 +02:00
var columnOptions = databaseOptions
.columnOptions()
.stream()
.filter(opts -> opts.columnName().equals(column.name()))
.findFirst()
.map(opts -> (ColumnOptions) opts)
.orElse(databaseOptions.defaultColumnOptions());
2022-03-10 02:38:57 +01:00
//noinspection ConstantConditions
2022-04-08 14:32:47 +02:00
if (columnOptions.memtableMemoryBudgetBytes() != null) {
2022-04-04 22:55:28 +02:00
// about 512MB of ram will be used for level style compaction
2022-04-08 14:32:47 +02:00
columnFamilyOptions.optimizeLevelStyleCompaction(columnOptions.memtableMemoryBudgetBytes().orElse(
2022-04-04 22:55:28 +02:00
databaseOptions.lowMemory()
? (DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET / 4)
: DEFAULT_COMPACTION_MEMTABLE_MEMORY_BUDGET));
2022-03-10 02:38:57 +01:00
}
2022-04-28 11:35:01 +02:00
if (isDisableAutoCompactions()) {
columnFamilyOptions.setDisableAutoCompactions(true);
}
2022-06-09 00:13:44 +02:00
var blobFiles = columnOptions.blobFiles();
columnFamilyOptions.setEnableBlobFiles(blobFiles);
2022-09-26 21:56:56 +02:00
try {
columnFamilyOptions.setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY);
} catch (Throwable ex) {
logger.error("Failed to set prepopulate blob cache", ex);
}
2022-06-09 00:13:44 +02:00
if (blobFiles) {
if (columnOptions.blobFileSize().isPresent()) {
columnFamilyOptions.setBlobFileSize(columnOptions.blobFileSize().get());
}
if (columnOptions.minBlobSize().isPresent()) {
columnFamilyOptions.setMinBlobSize(columnOptions.minBlobSize().get());
}
if (columnOptions.blobCompressionType().isPresent()) {
columnFamilyOptions.setCompressionType(columnOptions.blobCompressionType().get().getType());
} else {
columnFamilyOptions.setCompressionType(CompressionType.LZ4_COMPRESSION);
}
columnFamilyOptions.setBlobCompactionReadaheadSize(4 * SizeUnit.MB);
columnFamilyOptions.setEnableBlobGarbageCollection(true);
2022-06-01 17:36:21 +02:00
}
// This option is not supported with multiple db paths
// https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
// https://github.com/facebook/rocksdb/wiki/Tuning-RocksDB-on-Spinning-Disks
boolean dynamicLevelBytes = databaseOptions.volumes().size() <= 1;
if (dynamicLevelBytes) {
columnFamilyOptions.setLevelCompactionDynamicLevelBytes(true);
} else {
2023-09-26 02:43:06 +02:00
columnFamilyOptions.setLevelCompactionDynamicLevelBytes(false);
// https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
columnFamilyOptions.setMaxBytesForLevelBase(256 * SizeUnit.MB);
// https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
columnFamilyOptions.setMaxBytesForLevelMultiplier(10);
}
2022-04-28 11:35:01 +02:00
if (isDisableAutoCompactions()) {
columnFamilyOptions.setLevel0FileNumCompactionTrigger(-1);
} else if (!FOLLOW_ROCKSDB_OPTIMIZATIONS) {
// ArangoDB uses a value of 2: https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
// Higher values speed up writes, but slow down reads
columnFamilyOptions.setLevel0FileNumCompactionTrigger(2);
2022-04-28 11:35:01 +02:00
}
if (isDisableSlowdown()) {
columnFamilyOptions.setLevel0SlowdownWritesTrigger(-1);
columnFamilyOptions.setLevel0StopWritesTrigger(Integer.MAX_VALUE);
columnFamilyOptions.setHardPendingCompactionBytesLimit(Long.MAX_VALUE);
columnFamilyOptions.setSoftPendingCompactionBytesLimit(Long.MAX_VALUE);
} {
// https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
columnFamilyOptions.setLevel0SlowdownWritesTrigger(20);
// https://www.arangodb.com/docs/stable/programs-arangod-rocksdb.html
columnFamilyOptions.setLevel0StopWritesTrigger(36);
}
2022-04-08 14:32:47 +02:00
if (!columnOptions.levels().isEmpty()) {
columnFamilyOptions.setNumLevels(columnOptions.levels().size());
2022-05-22 16:48:08 +02:00
var firstLevelOptions = getRocksLevelOptions(columnOptions.levels().get(0), refs);
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setCompressionType(firstLevelOptions.compressionType);
columnFamilyOptions.setCompressionOptions(firstLevelOptions.compressionOptions);
2022-03-22 11:50:30 +01:00
2022-04-08 14:32:47 +02:00
var lastLevelOptions = getRocksLevelOptions(columnOptions
2022-03-22 11:50:30 +01:00
.levels()
2022-05-22 16:48:08 +02:00
.get(columnOptions.levels().size() - 1), refs);
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setBottommostCompressionType(lastLevelOptions.compressionType);
columnFamilyOptions.setBottommostCompressionOptions(lastLevelOptions.compressionOptions);
2022-03-22 11:50:30 +01:00
columnFamilyOptions.setCompressionPerLevel(mapList(columnOptions.levels(), v -> v.compression().getType()));
2022-03-10 02:38:57 +01:00
} else {
2022-04-30 02:14:44 +02:00
columnFamilyOptions.setNumLevels(7);
List<CompressionType> compressionTypes = new ArrayList<>(7);
for (int i = 0; i < 7; i++) {
2022-03-22 11:50:30 +01:00
if (i < 2) {
compressionTypes.add(CompressionType.NO_COMPRESSION);
} else {
compressionTypes.add(CompressionType.LZ4_COMPRESSION);
}
}
columnFamilyOptions.setBottommostCompressionType(CompressionType.LZ4HC_COMPRESSION);
2022-05-22 16:48:08 +02:00
var compressionOptions = new CompressionOptions()
2022-03-21 15:19:17 +01:00
.setEnabled(true)
2022-05-22 16:48:08 +02:00
.setMaxDictBytes(32768);
refs.track(compressionOptions);
columnFamilyOptions.setBottommostCompressionOptions(compressionOptions);
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setCompressionPerLevel(compressionTypes);
2022-03-10 02:38:57 +01:00
}
2023-02-24 00:18:02 +01:00
final TableFormatConfig tableOptions = inMemory ? new PlainTableConfig() : new BlockBasedTableConfig();
2022-05-02 18:48:44 +02:00
if (!FOLLOW_ROCKSDB_OPTIMIZATIONS) {
if (!databaseOptions.lowMemory()) {
// tableOptions.setOptimizeFiltersForMemory(true);
columnFamilyOptions.setWriteBufferSize(256 * SizeUnit.MB);
}
2022-06-21 22:52:42 +02:00
}
if (columnOptions.writeBufferSize().isPresent()) {
columnFamilyOptions.setWriteBufferSize(columnOptions.writeBufferSize().get());
2022-03-10 02:38:57 +01:00
}
2023-05-23 00:20:14 +02:00
columnFamilyOptions.setMaxWriteBufferNumberToMaintain(1);
2023-02-24 00:18:02 +01:00
if (tableOptions instanceof BlockBasedTableConfig blockBasedTableConfig) {
2023-09-26 01:45:28 +02:00
blockBasedTableConfig.setVerifyCompression(VERIFY_COMPRESSION);
2023-02-24 00:18:02 +01:00
}
2022-04-08 14:32:47 +02:00
if (columnOptions.filter().isPresent()) {
var filterOptions = columnOptions.filter().get();
2022-04-06 14:53:08 +02:00
if (filterOptions instanceof it.cavallium.dbengine.rpc.current.data.BloomFilter bloomFilterOptions) {
// If OptimizeFiltersForHits == true: memory size = bitsPerKey * (totalKeys * 0.1)
// If OptimizeFiltersForHits == false: memory size = bitsPerKey * totalKeys
final BloomFilter bloomFilter = new BloomFilter(bloomFilterOptions.bitsPerKey());
2022-05-22 16:48:08 +02:00
refs.track(bloomFilter);
2023-02-24 00:18:02 +01:00
if (tableOptions instanceof BlockBasedTableConfig blockBasedTableConfig) {
blockBasedTableConfig.setFilterPolicy(bloomFilter);
}
2022-08-16 19:50:53 +02:00
} else if (filterOptions instanceof NoFilter) {
2023-02-24 00:18:02 +01:00
if (tableOptions instanceof BlockBasedTableConfig blockBasedTableConfig) {
blockBasedTableConfig.setFilterPolicy(null);
}
2022-04-06 14:53:08 +02:00
}
}
2022-04-08 14:32:47 +02:00
boolean cacheIndexAndFilterBlocks = columnOptions.cacheIndexAndFilterBlocks()
2022-04-05 13:58:12 +02:00
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
.orElse(true);
if (databaseOptions.spinning()) {
2022-05-02 18:48:44 +02:00
if (!FOLLOW_ROCKSDB_OPTIMIZATIONS) {
// https://github.com/facebook/rocksdb/wiki/Tuning-RocksDB-on-Spinning-Disks
// cacheIndexAndFilterBlocks = true;
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
columnFamilyOptions.setMinWriteBufferNumberToMerge(3);
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
columnFamilyOptions.setMaxWriteBufferNumber(4);
}
}
2023-02-24 00:18:02 +01:00
if (tableOptions instanceof BlockBasedTableConfig blockBasedTableConfig) {
blockBasedTableConfig
// http://rocksdb.org/blog/2018/08/23/data-block-hash-index.html
.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinaryAndHash)
// http://rocksdb.org/blog/2018/08/23/data-block-hash-index.html
.setDataBlockHashTableUtilRatio(0.75)
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
.setPinTopLevelIndexAndFilter(true)
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
.setPinL0FilterAndIndexBlocksInCache(true)
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
.setCacheIndexAndFilterBlocksWithHighPriority(true)
.setCacheIndexAndFilterBlocks(cacheIndexAndFilterBlocks)
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
// Enabling partition filters increase the reads by 2x
.setPartitionFilters(columnOptions.partitionFilters().orElse(false))
// https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters
.setIndexType(columnOptions.partitionFilters().orElse(false) ? IndexType.kTwoLevelIndexSearch : IndexType.kBinarySearch)
.setChecksumType(ChecksumType.kXXH3)
2023-09-26 01:45:28 +02:00
.setVerifyCompression(VERIFY_COMPRESSION)
2023-02-24 00:18:02 +01:00
// Spinning disks: 64KiB to 256KiB (also 512KiB). SSDs: 16KiB
// https://github.com/facebook/rocksdb/wiki/Tuning-RocksDB-on-Spinning-Disks
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
.setBlockSize(columnOptions.blockSize().orElse((databaseOptions.spinning() ? 128 : 16) * 1024))
.setBlockCache(optionsWithCache.standardCache())
.setPersistentCache(resolvePersistentCache(persistentCaches,
rocksdbOptions,
databaseOptions.persistentCaches(),
columnOptions.persistentCacheId(),
refs,
rocksLogger
));
}
2022-03-10 02:38:57 +01:00
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setTableFormatConfig(tableOptions);
2023-02-24 00:18:02 +01:00
if (inMemory) {
columnFamilyOptions.useFixedLengthPrefixExtractor(3);
}
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setCompactionPriority(CompactionPriority.MinOverlappingRatio);
if (columnOptions.filter().isPresent()) {
var filterOptions = columnOptions.filter().get();
2022-04-06 14:53:08 +02:00
if (filterOptions instanceof it.cavallium.dbengine.rpc.current.data.BloomFilter bloomFilterOptions) {
boolean optimizeForHits = bloomFilterOptions.optimizeForHits()
// https://github.com/facebook/rocksdb/wiki/Tuning-RocksDB-on-Spinning-Disks
// https://github.com/EighteenZi/rocksdb_wiki/blob/master/RocksDB-Tuning-Guide.md#throughput-gap-between-random-read-vs-sequential-read-is-much-higher-in-spinning-disks-suggestions=
.orElse(databaseOptions.spinning());
2022-04-08 14:32:47 +02:00
columnFamilyOptions.setOptimizeFiltersForHits(optimizeForHits);
2022-04-06 14:53:08 +02:00
}
}
2022-04-06 14:53:08 +02:00
2022-05-02 18:48:44 +02:00
if (!FOLLOW_ROCKSDB_OPTIMIZATIONS) {
// // Increasing this value can reduce the frequency of compaction and reduce write amplification,
// // but it will also cause old data to be unable to be cleaned up in time, thus increasing read amplification.
// // This parameter is not easy to adjust. It is generally not recommended to set it above 256MB.
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
columnFamilyOptions.setTargetFileSizeBase(64 * SizeUnit.MB);
// // For each level up, the threshold is multiplied by the factor target_file_size_multiplier
// // (but the default value is 1, which means that the maximum sstable of each level is the same).
columnFamilyOptions.setTargetFileSizeMultiplier(2);
}
2022-03-10 02:38:57 +01:00
2022-04-08 14:32:47 +02:00
descriptors.add(new ColumnFamilyDescriptor(column.name().getBytes(StandardCharsets.US_ASCII), columnFamilyOptions));
2020-12-07 22:15:18 +01:00
}
// Get databases directory path
2022-05-04 01:21:56 +02:00
requireNonNull(path);
2020-12-07 22:15:18 +01:00
Path databasesDirPath = path.toAbsolutePath().getParent();
String dbPathString = databasesDirPath.toString() + File.separatorChar + path.getFileName();
this.dbPath = Paths.get(dbPathString);
2021-06-27 15:40:56 +02:00
// Set options
this.databaseOptions = databaseOptions;
2022-05-09 22:08:54 +02:00
var statsLevel = System.getProperty("it.cavallium.dbengine.stats.level");
if (statsLevel != null) {
this.statistics = registerStatistics(name, rocksdbOptions, meterRegistry, StatsLevel.valueOf(statsLevel));
} else {
this.statistics = null;
}
2021-06-25 23:47:53 +02:00
while (true) {
try {
// a factory method that returns a RocksDB instance
if (databaseOptions.openAsSecondary()) {
var secondaryPath = dbPath
.resolve("secondary-log")
.resolve(databaseOptions.secondaryDirectoryName().orElse("unnamed-" + UUID.randomUUID()));
try {
Files.createDirectories(secondaryPath);
} catch (IOException e) {
throw new RocksDBException("Failed to create secondary exception: " + e);
}
2023-09-26 02:43:06 +02:00
this.db = RocksDB.openReadOnly(rocksdbOptions,
dbPathString,
descriptors,
2023-09-26 02:43:06 +02:00
handles,
false
);
} else if (databaseOptions.optimistic()) {
2022-03-10 02:38:57 +01:00
this.db = OptimisticTransactionDB.open(rocksdbOptions, dbPathString, descriptors, handles);
2021-12-27 17:45:52 +01:00
} else {
2022-05-22 16:48:08 +02:00
var transactionOptions = new TransactionDBOptions()
.setWritePolicy(TxnDBWritePolicy.WRITE_COMMITTED)
.setTransactionLockTimeout(5000)
.setDefaultLockTimeout(5000);
refs.track(transactionOptions);
2022-03-10 02:38:57 +01:00
this.db = TransactionDB.open(rocksdbOptions,
2022-05-22 16:48:08 +02:00
transactionOptions,
2021-12-27 18:44:54 +01:00
dbPathString,
descriptors,
handles
);
2021-12-27 17:45:52 +01:00
}
this.standardCache = optionsWithCache.standardCache;
2021-06-25 23:47:53 +02:00
break;
} catch (RocksDBException ex) {
switch (ex.getMessage()) {
case "Direct I/O is not supported by the specified DB." -> {
logger.warn(ex.getLocalizedMessage());
2021-06-27 15:40:56 +02:00
rocksdbOptions
2021-06-25 23:47:53 +02:00
.setUseDirectReads(false)
2021-06-27 15:06:48 +02:00
.setUseDirectIoForFlushAndCompaction(false)
2021-06-27 15:40:56 +02:00
.setAllowMmapReads(databaseOptions.allowMemoryMapping())
.setAllowMmapWrites(databaseOptions.allowMemoryMapping());
2021-06-25 23:47:53 +02:00
}
default -> throw ex;
}
}
}
2020-12-07 22:15:18 +01:00
this.handles = new HashMap<>();
2022-03-02 12:34:30 +01:00
if (enableColumnsBug && !inMemory) {
2021-06-19 16:26:54 +02:00
for (int i = 0; i < columns.size(); i++) {
this.handles.put(columns.get(i), handles.get(i));
2021-06-19 16:26:54 +02:00
}
} else {
handles: for (ColumnFamilyHandle handle : handles) {
for (Column column : columns) {
if (Arrays.equals(column.name().getBytes(StandardCharsets.US_ASCII), handle.getName())) {
this.handles.put(column, handle);
2021-06-19 16:26:54 +02:00
continue handles;
}
}
}
2020-12-07 22:15:18 +01:00
}
2022-05-22 16:48:08 +02:00
handles.forEach(refs::track);
2021-03-21 13:06:54 +01:00
// compactDb(db, handles);
if (!databaseOptions.openAsSecondary()) {
logger.info("Flushing database at {}", dbPathString);
flushDb(db, handles);
}
2020-12-07 22:15:18 +01:00
} catch (RocksDBException ex) {
throw new DBException(ex);
2020-12-07 22:15:18 +01:00
}
2022-01-15 20:00:10 +01:00
try {
for (ColumnFamilyHandle cfh : handles) {
var props = db.getProperty(cfh, "rocksdb.stats");
logger.trace("Stats for database {}, column {}: {}",
name,
new String(cfh.getName(), StandardCharsets.UTF_8),
props
);
}
} catch (RocksDBException ex) {
logger.debug("Failed to obtain stats", ex);
}
2022-05-04 01:21:56 +02:00
for (RocksDBLongProperty property : RocksDBLongProperty.values()) {
registerGauge(meterRegistry, name, property.getName(), property.isDividedByColumnFamily());
}
2022-04-07 20:03:29 +02:00
// Bloom seek stats
2022-05-04 01:21:56 +02:00
registerGauge(meterRegistry, name, "rocksdb.bloom.filter.prefix.useful", true);
registerGauge(meterRegistry, name, "rocksdb.bloom.filter.prefix.checked", true);
2022-04-07 20:03:29 +02:00
// Bloom point lookup stats
2022-05-04 01:21:56 +02:00
registerGauge(meterRegistry, name, "rocksdb.bloom.filter.useful", true);
registerGauge(meterRegistry, name, "rocksdb.bloom.filter.full.positive", true);
registerGauge(meterRegistry, name, "rocksdb.bloom.filter.full.true.positive", true);
2022-01-15 20:00:10 +01:00
}
public static boolean isDisableAutoCompactions() {
2022-06-09 00:49:08 +02:00
return parseBoolean(System.getProperty("it.cavallium.dbengine.compactions.auto.disable", "false"));
2022-04-28 11:35:01 +02:00
}
public static boolean isDisableSlowdown() {
return isDisableAutoCompactions()
2022-06-09 00:49:08 +02:00
|| parseBoolean(System.getProperty("it.cavallium.dbengine.disableslowdown", "false"));
}
2022-04-30 14:21:20 +02:00
protected void ensureOpen() {
if (closed) {
throw new IllegalStateException("Database closed");
}
RocksDBUtils.ensureOpen(db, null);
}
protected void ensureOwned(AbstractImmutableNativeReference rocksObject) {
2022-05-12 19:14:27 +02:00
RocksDBUtils.ensureOwned(rocksObject);
}
private synchronized PersistentCache resolvePersistentCache(HashMap<String, PersistentCache> caches,
DBOptions rocksdbOptions,
List<it.cavallium.dbengine.rpc.current.data.PersistentCache> persistentCaches,
2022-05-22 16:48:08 +02:00
NullableString persistentCacheId,
2022-06-08 18:52:15 +02:00
RocksDBRefs refs,
RocksLog4jLogger rocksLogger) throws RocksDBException {
if (persistentCacheId.isEmpty()) {
return null;
}
var existingPersistentCache = caches.get(persistentCacheId.get());
if (existingPersistentCache != null) {
return existingPersistentCache;
}
var foundCaches = persistentCaches
.stream()
.filter(cache -> cache.id().equals(persistentCacheId.get()))
.toList();
if (foundCaches.size() > 1) {
throw new IllegalArgumentException("There are " + foundCaches.size()
+ " defined persistent caches with the id \"" + persistentCacheId.get() + "\"");
}
for (it.cavallium.dbengine.rpc.current.data.PersistentCache foundCache : foundCaches) {
var persistentCache = new PersistentCache(Env.getDefault(),
foundCache.path(),
foundCache.size(),
2022-06-08 18:52:15 +02:00
rocksLogger,
foundCache.optimizeForNvm()
);
2022-05-22 16:48:08 +02:00
refs.track(persistentCache);
var prev = caches.put(persistentCacheId.get(), persistentCache);
if (prev != null) {
throw new IllegalStateException();
}
return persistentCache;
}
throw new IllegalArgumentException("Persistent cache " + persistentCacheId.get() + " is not defined");
}
public Map<Column, ColumnFamilyHandle> getAllColumnFamilyHandles() {
2022-03-22 12:59:22 +01:00
return this.handles;
}
public int getLastVolumeId() {
var paths = convertPaths(dbPath.toAbsolutePath().getParent(), dbPath.getFileName(), databaseOptions.volumes());
return paths.size() - 1;
}
2022-04-30 02:14:44 +02:00
public int getLevels(Column column) {
2022-04-30 14:21:20 +02:00
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
var cfh = handles.get(column);
ensureOwned(cfh);
return RocksDBUtils.getLevels(db, cfh);
2022-04-30 14:21:20 +02:00
} finally {
closeLock.unlockRead(closeReadLock);
}
}
public Stream<RocksDBFile> getAllLiveFiles() throws RocksDBException {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
db.getLiveFiles(); // flushes the memtable
var liveFilesMetadata = db.getLiveFilesMetaData();
List<RocksDBFile> files = new ArrayList<>();
for (LiveFileMetaData file : liveFilesMetadata) {
files.add(new RocksDBColumnFile(db, getCfh(file.columnFamilyName()), file));
}
return files.stream();
} finally {
closeLock.unlockRead(closeReadLock);
}
}
public List<RocksDBFile> getColumnFiles(Column column, boolean excludeLastLevel) {
2022-04-30 14:21:20 +02:00
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
var cfh = handles.get(column);
ensureOwned(cfh);
return RocksDBUtils.getColumnFiles(db, cfh, excludeLastLevel);
2022-04-30 14:21:20 +02:00
} finally {
closeLock.unlockRead(closeReadLock);
}
}
2023-02-28 23:10:31 +01:00
public void forceCompaction(int volumeId) {
2022-04-30 14:21:20 +02:00
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
for (var cfh : this.handles.values()) {
ensureOwned(cfh);
RocksDBUtils.forceCompaction(db, name, cfh, volumeId, logger);
2022-04-30 14:21:20 +02:00
}
2023-02-28 23:10:31 +01:00
} catch (RocksDBException e) {
throw new DBException("Failed to force compaction", e);
2022-04-30 14:21:20 +02:00
} finally {
closeLock.unlockRead(closeReadLock);
}
}
public void flush(FlushOptions flushOptions) throws RocksDBException {
2022-04-30 14:21:20 +02:00
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(flushOptions);
2022-06-21 22:52:42 +02:00
db.flush(flushOptions, List.copyOf(getAllColumnFamilyHandles().values()));
db.flushWal(true);
2022-04-30 14:21:20 +02:00
} finally {
closeLock.unlockRead(closeReadLock);
}
}
2022-06-21 22:52:42 +02:00
@Override
public void preClose() {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
try (var fo = new FlushOptions().setWaitForFlush(true)) {
flush(fo);
} catch (RocksDBException ex) {
throw new DBException(ex);
2022-06-21 22:52:42 +02:00
}
db.cancelAllBackgroundWork(true);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-06-21 22:52:42 +02:00
}
2022-08-15 23:07:17 +02:00
@Override
protected void onPauseForBackup() {
pauseWrites();
2022-08-15 23:07:17 +02:00
}
@Override
protected void onResumeAfterBackup() {
resumeWrites();
2022-08-15 23:07:17 +02:00
}
2022-11-22 17:36:31 +01:00
@Override
public void ingestSST(Column column, Stream<Path> files, boolean replaceExisting) {
2022-11-22 17:36:31 +01:00
var columnHandle = handles.get(column);
if (columnHandle == null) {
logger.warn("Column {} doesn't exist", column);
return;
2022-11-22 17:36:31 +01:00
}
collect(files, iterating(sst -> {
2022-11-22 17:36:31 +01:00
try (var opts = new IngestExternalFileOptions()) {
2022-11-22 18:44:45 +01:00
opts.setIngestBehind(!replaceExisting);
2022-11-22 17:36:31 +01:00
opts.setSnapshotConsistency(false);
opts.setAllowBlockingFlush(true);
opts.setMoveFiles(true);
db.ingestExternalFile(columnHandle, List.of(sst.toString()), opts);
} catch (RocksDBException ex) {
throw new DBException(new DBException("Failed to ingest SST file " + sst, ex));
2022-11-22 17:36:31 +01:00
}
}));
2022-11-22 17:36:31 +01:00
}
2022-03-22 11:50:30 +01:00
private record RocksLevelOptions(CompressionType compressionType, CompressionOptions compressionOptions) {}
2022-05-22 16:48:08 +02:00
private RocksLevelOptions getRocksLevelOptions(DatabaseLevel levelOptions, RocksDBRefs refs) {
2022-03-22 11:50:30 +01:00
var compressionType = levelOptions.compression().getType();
var compressionOptions = new CompressionOptions();
2022-05-22 16:48:08 +02:00
refs.track(compressionOptions);
2022-03-22 11:50:30 +01:00
if (compressionType != CompressionType.NO_COMPRESSION) {
compressionOptions.setEnabled(true);
compressionOptions.setMaxDictBytes(levelOptions.maxDictBytes());
} else {
compressionOptions.setEnabled(false);
}
return new RocksLevelOptions(compressionType, compressionOptions);
}
private void registerGauge(MeterRegistry meterRegistry, String name, String propertyName, boolean divideByAllColumns) {
2022-05-04 01:21:56 +02:00
if (divideByAllColumns) {
2022-05-12 19:14:27 +02:00
for (var cfhEntry : handles.entrySet()) {
2022-05-04 01:21:56 +02:00
var columnName = cfhEntry.getKey().name();
var cfh = cfhEntry.getValue();
meterRegistry.gauge("rocksdb.property.value",
List.of(Tag.of("db.name", name), Tag.of("db.column.name", columnName), Tag.of("db.property.name", propertyName)),
db,
database -> {
if (closed) {
return 0d;
}
var closeReadLock = closeLock.readLock();
try {
if (closed) {
return 0d;
}
return database.getLongProperty(cfh, propertyName);
2022-05-04 01:21:56 +02:00
} catch (RocksDBException e) {
if ("NotFound".equals(e.getMessage())) {
return 0d;
}
throw new RuntimeException(e);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
);
}
} else {
meterRegistry.gauge("rocksdb.property.value",
List.of(Tag.of("db.name", name), Tag.of("db.property.name", propertyName)),
db,
database -> {
2022-04-30 14:21:20 +02:00
if (closed) {
return 0d;
}
2022-05-04 01:21:56 +02:00
var closeReadLock = closeLock.readLock();
try {
if (closed) {
return 0d;
}
2022-05-04 12:36:32 +02:00
return database.getAggregatedLongProperty(propertyName) / (double) handles.size();
2022-05-04 01:21:56 +02:00
} catch (RocksDBException e) {
if ("NotFound".equals(e.getMessage())) {
return 0d;
}
throw new RuntimeException(e);
} finally {
closeLock.unlockRead(closeReadLock);
2022-04-07 22:19:11 +02:00
}
2022-01-15 20:00:10 +01:00
}
2022-05-04 01:21:56 +02:00
);
}
2020-12-07 22:15:18 +01:00
}
@Override
public String getDatabaseName() {
return name;
}
public StampedLock getCloseLock() {
return closeLock;
}
2023-03-25 13:42:49 +01:00
private void flushAndCloseDb(RocksDB db, Cache standardCache, List<ColumnFamilyHandle> handles) {
var closeWriteLock = closeLock.writeLock();
try {
if (closed) {
return;
}
closed = true;
if (db.isOwningHandle()) {
//flushDb(db, handles);
}
2020-12-07 22:15:18 +01:00
snapshotsHandles.forEach((id, snapshot) -> {
try {
2022-04-20 23:29:39 +02:00
if (db.isOwningHandle()) {
db.releaseSnapshot(snapshot);
2022-05-12 19:14:27 +02:00
snapshot.close();
}
} catch (Exception ex2) {
// ignore exception
logger.debug("Failed to release snapshot " + id, ex2);
}
});
snapshotsHandles.clear();
2021-07-06 22:27:03 +02:00
try {
db.closeE();
2021-07-06 22:27:03 +02:00
} catch (Exception ex) {
logger.error("Can't close database " + name + " at " + dbPath, ex);
2021-07-06 22:27:03 +02:00
}
2022-05-01 15:35:12 +02:00
for (ColumnFamilyHandle handle : handles) {
try {
handle.close();
} catch (Exception ex) {
logger.error("Can't close column family", ex);
}
}
if (standardCache != null) {
standardCache.close();
}
for (PersistentCache persistentCache : persistentCaches.values()) {
try {
persistentCache.close();
} catch (Exception ex) {
logger.error("Can't close persistent cache", ex);
}
2021-07-01 21:19:52 +02:00
}
2022-05-22 16:48:08 +02:00
refs.close();
} finally {
closeLock.unlockWrite(closeWriteLock);
2022-04-11 20:04:27 +02:00
}
2020-12-07 22:15:18 +01:00
}
2021-10-20 01:51:34 +02:00
private void flushDb(RocksDB db, List<ColumnFamilyHandle> handles) throws RocksDBException {
if (LLUtils.isInNonBlockingThread()) {
logger.error("Called flushDb in a nonblocking thread");
}
2020-12-07 22:15:18 +01:00
// force flush the database
2021-07-06 22:27:03 +02:00
try (var flushOptions = new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true)) {
db.flush(flushOptions);
2020-12-07 22:15:18 +01:00
}
2021-07-06 22:27:03 +02:00
try (var flushOptions = new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true)) {
db.flush(flushOptions, handles);
}
db.flushWal(true);
db.syncWal();
2020-12-07 22:15:18 +01:00
// end force flush
}
2021-04-03 19:09:06 +02:00
@SuppressWarnings("unused")
2021-10-20 01:51:34 +02:00
private void compactDb(TransactionDB db, List<ColumnFamilyHandle> handles) {
if (LLUtils.isInNonBlockingThread()) {
logger.error("Called compactDb in a nonblocking thread");
}
2021-03-19 20:55:38 +01:00
// force compact the database
for (ColumnFamilyHandle cfh : handles) {
var t = new Thread(() -> {
2021-03-21 13:06:54 +01:00
int r = ThreadLocalRandom.current().nextInt();
var s = StopWatch.createStarted();
2021-03-19 20:55:38 +01:00
try {
// Range rangeToCompact = db.suggestCompactRange(cfh);
2021-03-21 13:06:54 +01:00
logger.info("Compacting range {}", r);
2022-05-11 00:29:42 +02:00
try (var cro = new CompactRangeOptions()
2021-03-21 13:06:54 +01:00
.setAllowWriteStall(true)
.setExclusiveManualCompaction(true)
2022-05-11 00:29:42 +02:00
.setChangeLevel(false)) {
db.compactRange(cfh, null, null, cro);
}
2021-03-19 20:55:38 +01:00
} catch (RocksDBException e) {
if ("Database shutdown".equalsIgnoreCase(e.getMessage())) {
logger.warn("Compaction cancelled: database shutdown");
} else {
logger.warn("Failed to compact range", e);
}
}
2021-03-21 13:06:54 +01:00
logger.info("Compacted range {} in {} milliseconds", r, s.getTime(TimeUnit.MILLISECONDS));
2021-03-19 20:55:38 +01:00
}, "Compaction");
t.setDaemon(true);
t.start();
}
// end force compact
}
2022-12-21 01:05:08 +01:00
2023-03-25 13:42:49 +01:00
record OptionsWithCache(DBOptions options, @Nullable Cache standardCache) {
2022-12-21 01:05:08 +01:00
}
2022-03-10 02:38:57 +01:00
2023-02-22 16:59:35 +01:00
private static OptionsWithCache openRocksDb(@Nullable Path path, DatabaseOptions databaseOptions, RocksDBRefs refs) {
try {
// Get databases directory path
Path databasesDirPath;
if (path != null) {
databasesDirPath = path.toAbsolutePath().getParent();
// Create base directories
if (Files.notExists(databasesDirPath)) {
Files.createDirectories(databasesDirPath);
}
} else {
databasesDirPath = null;
2021-07-10 20:52:01 +02:00
}
2023-02-22 16:59:35 +01:00
//noinspection ConstantConditions
if (databaseOptions.persistentCaches() != null) {
for (var persistentCache : databaseOptions.persistentCaches()) {
var persistentCachePath = Paths.get(persistentCache.path());
if (Files.notExists(persistentCachePath)) {
Files.createDirectories(persistentCachePath);
if (!Files.isDirectory(persistentCachePath)) {
throw new IllegalArgumentException(
"Persistent cache \"" + persistentCache.id() + "\" path \"" + persistentCachePath
+ "\" is not a directory!");
}
2022-06-09 16:47:42 +02:00
}
}
}
2020-12-07 22:15:18 +01:00
2023-02-22 16:59:35 +01:00
// the Options class contains a set of configurable DB options
// that determines the behaviour of the database.
var options = new DBOptions();
refs.track(options);
2023-09-03 01:33:58 +02:00
options.setParanoidChecks(PARANOID_CHECKS);
2023-09-26 01:45:28 +02:00
options.setSkipCheckingSstFileSizesOnDbOpen(!VERIFY_FILE_SIZE);
2023-02-22 16:59:35 +01:00
options.setEnablePipelinedWrite(true);
var maxSubCompactions = Integer.parseInt(System.getProperty("it.cavallium.dbengine.compactions.max.sub", "-1"));
if (maxSubCompactions > 0) {
2023-02-22 16:59:35 +01:00
options.setMaxSubcompactions(maxSubCompactions);
}
var customWriteRate = Long.parseLong(System.getProperty("it.cavallium.dbengine.write.delayedrate", "-1"));
if (customWriteRate >= 0) {
options.setDelayedWriteRate(customWriteRate);
}
if (databaseOptions.logPath().isPresent()) {
options.setDbLogDir(databaseOptions.logPath().get());
}
if (databaseOptions.walPath().isPresent()) {
options.setWalDir(databaseOptions.walPath().get());
}
options.setCreateIfMissing(true);
options.setSkipStatsUpdateOnDbOpen(true);
options.setCreateMissingColumnFamilies(true);
2023-09-26 02:43:06 +02:00
options.setInfoLogLevel(LOG_LEVEL);
2023-07-25 17:29:36 +02:00
// todo: automatically flush every x seconds?
options.setManualWalFlush(true);
2023-02-22 16:59:35 +01:00
options.setAvoidFlushDuringShutdown(false); // Flush all WALs during shutdown
options.setAvoidFlushDuringRecovery(true); // Flush all WALs during startup
options.setWalRecoveryMode(databaseOptions.absoluteConsistency()
? WALRecoveryMode.AbsoluteConsistency
: WALRecoveryMode.PointInTimeRecovery); // Crash if the WALs are corrupted.Default: TolerateCorruptedTailRecords
options.setDeleteObsoleteFilesPeriodMicros(20 * 1000000); // 20 seconds
options.setKeepLogFileNum(10);
requireNonNull(databasesDirPath);
requireNonNull(path.getFileName());
List<DbPath> paths = mapList(convertPaths(databasesDirPath, path.getFileName(), databaseOptions.volumes()),
p -> new DbPath(p.path, p.targetSize)
);
2023-02-22 16:59:35 +01:00
options.setDbPaths(paths);
options.setMaxOpenFiles(databaseOptions.maxOpenFiles().orElse(-1));
2023-09-03 01:14:18 +02:00
options.setMaxFileOpeningThreads(Runtime.getRuntime().availableProcessors());
2023-02-22 16:59:35 +01:00
if (databaseOptions.spinning()) {
// https://nightlies.apache.org/flink/flink-docs-release-1.3/api/java/org/apache/flink/contrib/streaming/state/PredefinedOptions.html
options.setUseFsync(false);
}
2022-04-11 16:53:17 +02:00
2023-02-22 16:59:35 +01:00
long writeBufferManagerSize;
if (databaseOptions.writeBufferManager().isPresent()) {
writeBufferManagerSize = databaseOptions.writeBufferManager().get();
} else {
writeBufferManagerSize = 0;
2022-05-02 18:48:44 +02:00
}
2023-02-22 16:59:35 +01:00
if (isDisableAutoCompactions()) {
options.setMaxBackgroundCompactions(0);
options.setMaxBackgroundJobs(0);
2022-04-11 16:53:17 +02:00
} else {
2023-02-22 16:59:35 +01:00
var backgroundJobs = Integer.parseInt(System.getProperty("it.cavallium.dbengine.jobs.background.num", "-1"));
if (backgroundJobs >= 0) {
options.setMaxBackgroundJobs(backgroundJobs);
}
2022-04-11 16:53:17 +02:00
}
2021-07-17 11:52:08 +02:00
2023-02-22 16:59:35 +01:00
Cache blockCache;
final boolean useDirectIO = databaseOptions.useDirectIO();
final boolean allowMmapReads = !useDirectIO && databaseOptions.allowMemoryMapping();
final boolean allowMmapWrites = !useDirectIO && (databaseOptions.allowMemoryMapping()
|| parseBoolean(System.getProperty("it.cavallium.dbengine.mmapwrites.enable", "false")));
// todo: replace with a real option called database-write-buffer-size
// 0 = default = disabled
long dbWriteBufferSize = Long.parseLong(System.getProperty("it.cavallium.dbengine.dbwritebuffer.size", "0"));
2023-02-22 16:59:35 +01:00
if (databaseOptions.lowMemory()) {
// LOW MEMORY
2021-07-17 11:52:08 +02:00
options
2023-02-22 16:59:35 +01:00
.setBytesPerSync(0) // default
.setWalBytesPerSync(0) // default
.setIncreaseParallelism(1)
.setDbWriteBufferSize(Math.min(dbWriteBufferSize, 8 * SizeUnit.MB))
2023-02-22 16:59:35 +01:00
.setWalTtlSeconds(60)
.setMaxTotalWalSize(10 * SizeUnit.GB)
2021-07-17 11:52:08 +02:00
;
2023-02-22 16:59:35 +01:00
blockCache = CACHE_FACTORY.newCache(writeBufferManagerSize + databaseOptions.blockCache().orElse(8L * SizeUnit.MB));
refs.track(blockCache);
if (useDirectIO) {
options
// Option to enable readahead in compaction
// If not set, it will be set to 2MB internally
.setCompactionReadaheadSize(2 * SizeUnit.MB) // recommend at least 2MB
// Option to tune write buffer for direct writes
.setWritableFileMaxBufferSize(SizeUnit.MB)
;
}
if (databaseOptions.spinning()) {
options
// method documentation
.setCompactionReadaheadSize(4 * SizeUnit.MB)
// guessed
.setWritableFileMaxBufferSize(2 * SizeUnit.MB);
}
} else {
// HIGH MEMORY
2022-05-29 23:48:40 +02:00
options
.setDbWriteBufferSize(dbWriteBufferSize)
2023-02-22 16:59:35 +01:00
.setBytesPerSync(64 * SizeUnit.MB)
.setWalBytesPerSync(64 * SizeUnit.MB)
2023-07-25 17:29:36 +02:00
.setWalTtlSeconds(80) // Auto
2023-02-22 16:59:35 +01:00
.setWalSizeLimitMB(0) // Auto
2023-07-25 17:29:36 +02:00
.setMaxTotalWalSize(0) // AUto
2023-02-22 16:59:35 +01:00
;
blockCache = CACHE_FACTORY.newCache(writeBufferManagerSize + databaseOptions.blockCache().orElse( 512 * SizeUnit.MB));
refs.track(blockCache);
if (useDirectIO) {
options
// Option to enable readahead in compaction
// If not set, it will be set to 2MB internally
.setCompactionReadaheadSize(4 * SizeUnit.MB) // recommend at least 2MB
// Option to tune write buffer for direct writes
.setWritableFileMaxBufferSize(2 * SizeUnit.MB)
;
}
if (databaseOptions.spinning()) {
options
// method documentation
.setCompactionReadaheadSize(16 * SizeUnit.MB)
// guessed
.setWritableFileMaxBufferSize(8 * SizeUnit.MB);
}
options.setIncreaseParallelism(Runtime.getRuntime().availableProcessors());
2022-05-29 23:48:40 +02:00
}
2023-02-22 16:59:35 +01:00
if (databaseOptions.writeBufferManager().isPresent()) {
var writeBufferManager = new WriteBufferManager(writeBufferManagerSize, blockCache, false);
refs.track(writeBufferManager);
options.setWriteBufferManager(writeBufferManager);
2022-04-11 16:53:17 +02:00
}
2021-06-25 23:47:53 +02:00
2022-06-09 00:49:08 +02:00
if (useDirectIO) {
2021-06-25 23:47:53 +02:00
options
2023-02-22 16:59:35 +01:00
.setAllowMmapReads(false)
.setAllowMmapWrites(false)
.setUseDirectReads(true)
2021-06-25 23:47:53 +02:00
;
2023-02-22 16:59:35 +01:00
} else {
2022-05-29 23:48:40 +02:00
options
2023-02-22 16:59:35 +01:00
.setAllowMmapReads(allowMmapReads)
.setAllowMmapWrites(allowMmapWrites);
2022-05-29 23:48:40 +02:00
}
2021-12-27 16:33:31 +01:00
2023-02-22 16:59:35 +01:00
if (useDirectIO || !allowMmapWrites) {
options.setUseDirectIoForFlushAndCompaction(true);
}
2021-07-17 11:52:08 +02:00
2023-03-25 13:42:49 +01:00
return new OptionsWithCache(options, blockCache);
2023-02-22 16:59:35 +01:00
} catch (IOException e) {
throw new DBException(e);
2021-07-17 11:52:08 +02:00
}
2020-12-07 22:15:18 +01:00
}
record DbPathRecord(Path path, long targetSize) {}
private static List<DbPathRecord> convertPaths(Path databasesDirPath, Path path, List<DatabaseVolume> volumes) {
var paths = new ArrayList<DbPathRecord>(volumes.size());
2021-12-27 16:33:31 +01:00
if (volumes.isEmpty()) {
return List.of(new DbPathRecord(databasesDirPath.resolve(path.getFileName() + "_hot"),
2022-04-15 16:49:01 +02:00
0), // Legacy
new DbPathRecord(databasesDirPath.resolve(path.getFileName() + "_cold"),
2022-04-07 20:03:29 +02:00
0), // Legacy
new DbPathRecord(databasesDirPath.resolve(path.getFileName() + "_colder"),
2022-04-15 16:49:01 +02:00
1000L * 1024L * 1024L * 1024L) // 1000GiB
2022-04-07 20:03:29 +02:00
); // Legacy
2021-12-27 16:33:31 +01:00
}
for (DatabaseVolume volume : volumes) {
Path volumePath;
if (volume.volumePath().isAbsolute()) {
volumePath = volume.volumePath();
} else {
volumePath = databasesDirPath.resolve(volume.volumePath());
}
paths.add(new DbPathRecord(volumePath, volume.targetSizeBytes()));
2021-12-27 16:33:31 +01:00
}
return paths;
}
2022-05-09 22:08:54 +02:00
private Statistics registerStatistics(String dbName, DBOptions dbOptions, MeterRegistry meterRegistry,
StatsLevel statsLevel) {
Statistics stats = new Statistics();
stats.setStatsLevel(statsLevel);
dbOptions.setStatistics(stats);
for (TickerType tickerType : TickerType.values()) {
if (tickerType == TickerType.TICKER_ENUM_MAX) {
continue;
}
meterRegistry.gauge("rocksdb.statistics.value",
List.of(Tag.of("db.name", dbName), Tag.of("db.statistics.name", tickerType.name())),
stats,
statistics -> {
2022-06-08 18:52:15 +02:00
if (closeRequested || closed) return 0d;
long closeReadLock = 0;
2022-05-09 22:08:54 +02:00
try {
2022-06-08 18:52:15 +02:00
closeReadLock = closeLock.tryReadLock(1, TimeUnit.SECONDS);
} catch (InterruptedException ignored) {}
try {
if (closeRequested || closed || closeReadLock == 0) return 0d;
2022-05-09 22:08:54 +02:00
return statistics.getTickerCount(tickerType);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
);
}
return stats;
}
private Snapshot getSnapshotLambda(LLSnapshot snapshot) {
2022-04-30 14:21:20 +02:00
var closeReadSnapLock = closeLock.readLock();
try {
ensureOpen();
var snapshotHandle = snapshotsHandles.get(snapshot.getSequenceNumber());
2022-06-20 00:32:56 +02:00
//ensureOwned(snapshotHandle);
2022-04-30 14:21:20 +02:00
return snapshotHandle;
} finally {
closeLock.unlockRead(closeReadSnapLock);
}
}
2020-12-07 22:15:18 +01:00
@Override
public LLLocalSingleton getSingleton(byte[] singletonListColumnName,
2022-03-20 14:33:27 +01:00
byte[] name,
byte @Nullable[] defaultValue) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
var cfh = getCfh(singletonListColumnName);
ensureOwned(cfh);
return new LLLocalSingleton(getRocksDBColumn(db, cfh),
this::getSnapshotLambda,
LLLocalKeyValueDatabase.this.name,
name,
ColumnUtils.toString(singletonListColumnName),
defaultValue
);
} catch (RocksDBException ex) {
throw new DBException("Failed to read " + Arrays.toString(name), ex);
} finally {
closeLock.unlockRead(closeReadLock);
}
2020-12-07 22:15:18 +01:00
}
@Override
public LLLocalDictionary getDictionary(byte[] columnName, UpdateMode updateMode) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
var cfh = getCfh(columnName);
ensureOwned(cfh);
return new LLLocalDictionary(getRocksDBColumn(db, cfh),
name,
ColumnUtils.toString(columnName),
this::getSnapshotLambda,
2023-07-22 00:56:43 +02:00
updateMode
);
} catch (RocksDBException e) {
throw new DBException(e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2020-12-07 22:15:18 +01:00
}
2022-03-22 12:59:22 +01:00
public RocksDBColumn getRocksDBColumn(byte[] columnName) {
2022-04-30 14:21:20 +02:00
var closeReadLock = closeLock.readLock();
2022-03-22 12:59:22 +01:00
try {
2022-04-30 14:21:20 +02:00
ensureOpen();
ColumnFamilyHandle cfh;
2022-04-30 14:21:20 +02:00
try {
cfh = getCfh(columnName);
ensureOwned(cfh);
} catch (RocksDBException e) {
throw new UnsupportedOperationException("Column family doesn't exist: " + Arrays.toString(columnName), e);
}
return getRocksDBColumn(db, cfh);
} finally {
closeLock.unlockRead(closeReadLock);
2022-03-22 12:59:22 +01:00
}
}
private RocksDBColumn getRocksDBColumn(RocksDB db, ColumnFamilyHandle cfh) {
var closeLock = getCloseLock();
2021-10-20 01:51:34 +02:00
if (db instanceof OptimisticTransactionDB optimisticTransactionDB) {
return new OptimisticRocksDBColumn(optimisticTransactionDB,
name,
cfh,
meterRegistry,
closeLock
);
2021-12-27 18:44:54 +01:00
} else if (db instanceof TransactionDB transactionDB) {
return new PessimisticRocksDBColumn(transactionDB,
name,
cfh,
meterRegistry,
closeLock
);
2021-10-20 01:51:34 +02:00
} else {
return new StandardRocksDBColumn(db, name, cfh, meterRegistry, closeLock);
2021-10-20 01:51:34 +02:00
}
}
private ColumnFamilyHandle getCfh(byte[] columnName) throws RocksDBException {
2022-05-12 19:14:27 +02:00
var cfh = handles.get(ColumnUtils.special(ColumnUtils.toString(columnName)));
assert enableColumnsBug || Arrays.equals(cfh.getName(), columnName);
2021-06-19 16:26:54 +02:00
return cfh;
}
2021-06-27 15:40:56 +02:00
public DatabaseOptions getDatabaseOptions() {
return databaseOptions;
}
public Stream<Path> getSSTS() {
var paths = convertPaths(dbPath.toAbsolutePath().getParent(), dbPath.getFileName(), databaseOptions.volumes());
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
return db.getLiveFiles().files.stream()
.filter(file -> file.endsWith(".sst"))
.map(file -> file.substring(1))
.mapMulti((file, sink) -> {
{
var path = dbPath.resolve(file);
if (Files.exists(path)) {
sink.accept(path);
return;
}
}
for (var volumePath : paths) {
var path = volumePath.path().resolve(file);
if (Files.exists(path)) {
sink.accept(path);
return;
}
}
});
} catch (RocksDBException e) {
throw new DBException(e);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
public void ingestSSTS(Stream<Path> sstsFlux) {
collect(sstsFlux.map(path -> path.toAbsolutePath().toString()), iterating(sst -> {
var closeReadLock = closeLock.readLock();
try (var opts = new IngestExternalFileOptions()) {
try {
logger.info("Ingesting SST \"{}\"...", sst);
db.ingestExternalFile(List.of(sst), opts);
logger.info("Ingested SST \"{}\" successfully", sst);
} catch (RocksDBException e) {
logger.error("Can't ingest SST \"{}\"", sst, e);
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}));
}
2022-01-15 20:00:10 +01:00
@Override
public MemoryStats getMemoryStats() {
if (closeRequested || closed) return null;
long closeReadLock = 0;
try {
//noinspection BlockingMethodInNonBlockingContext
closeReadLock = closeLock.tryReadLock(1, TimeUnit.SECONDS);
} catch (InterruptedException ignored) {}
try {
if (closeRequested || closed || closeReadLock == 0) return null;
ensureOpen();
2023-06-30 23:15:09 +02:00
return new MemoryStats(db.getAggregatedLongProperty(RocksDBLongProperty.ESTIMATE_TABLE_READERS_MEM.getName()),
db.getAggregatedLongProperty(RocksDBLongProperty.SIZE_ALL_MEM_TABLES.getName()),
db.getAggregatedLongProperty(RocksDBLongProperty.CUR_SIZE_ALL_MEM_TABLES.getName()),
db.getAggregatedLongProperty(RocksDBLongProperty.ESTIMATE_NUM_KEYS.getName()),
db.getAggregatedLongProperty(RocksDBLongProperty.BLOCK_CACHE_USAGE.getName()) / this.handles.size(),
db.getAggregatedLongProperty(RocksDBLongProperty.BLOCK_CACHE_PINNED_USAGE.getName()) / this.handles.size(),
db.getAggregatedLongProperty(RocksDBLongProperty.NUM_LIVE_VERSIONS.getName()) / this.handles.size()
);
} catch (RocksDBException e) {
throw new DBException("Failed to read memory stats", e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-04-09 02:45:42 +02:00
}
2022-05-04 01:21:56 +02:00
@Override
public Map<String, String> getMapProperty(@Nullable Column column, RocksDBMapProperty property) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
Map<String, String> result;
if (column == null) {
result = db.getMapProperty(property.getName());
} else {
var cfh = requireNonNull(handles.get(column));
result = db.getMapProperty(cfh, property.getName());
}
return result;
} catch (RocksDBException e) {
if (isEmpty(e)) return null;
throw new DBException("Failed to read property " + property.name(), e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-05-04 01:21:56 +02:00
}
private boolean isEmpty(RocksDBException ex) {
return "NotFound".equals(ex.getMessage());
2022-05-04 01:21:56 +02:00
}
@Override
public Stream<ColumnProperty<Map<String, String>>> getMapColumnProperties(RocksDBMapProperty property) {
2023-02-22 16:59:35 +01:00
return getAllColumnFamilyHandles().keySet().stream().map(c -> new ColumnProperty<>(c.name(), property.getName(), this.getMapProperty(c, property)));
2022-05-04 01:21:56 +02:00
}
@Override
public String getStringProperty(@Nullable Column column, RocksDBStringProperty property) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
if (column == null) {
return db.getProperty(property.getName());
} else {
var cfh = requireNonNull(handles.get(column));
return db.getProperty(cfh, property.getName());
}
} catch (RocksDBException e) {
if (isEmpty(e)) return null;
throw new DBException("Failed to read property " + property.name(), e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-05-04 01:21:56 +02:00
}
@Override
public Stream<ColumnProperty<String>> getStringColumnProperties(RocksDBStringProperty property) {
return getAllColumnFamilyHandles().keySet().stream().map(c -> {
2023-02-22 16:59:35 +01:00
return new ColumnProperty<>(c.name(), property.getName(), this.getStringProperty(c, property));
});
2022-05-04 01:21:56 +02:00
}
@Override
public Long getLongProperty(@Nullable Column column, RocksDBLongProperty property) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
if (column == null) {
return db.getLongProperty(property.getName());
} else {
var cfh = requireNonNull(handles.get(column));
return db.getLongProperty(cfh, property.getName());
}
} catch (RocksDBException e) {
if (isEmpty(e)) return null;
throw new DBException("Failed to read property " + property.name(), e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-05-04 01:21:56 +02:00
}
@Override
public Stream<ColumnProperty<Long>> getLongColumnProperties(RocksDBLongProperty property) {
return getAllColumnFamilyHandles().keySet().stream().map(c -> {
2023-02-22 16:59:35 +01:00
return new ColumnProperty<>(c.name(), property.getName(), this.getLongProperty(c, property));
2022-05-04 01:21:56 +02:00
});
}
2022-04-09 02:45:42 +02:00
@Override
public Long getAggregatedLongProperty(RocksDBLongProperty property) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
return db.getAggregatedLongProperty(property.getName());
} catch (RocksDBException e) {
if (isEmpty(e)) return null;
throw new DBException("Failed to read property " + property.name(), e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-04-09 02:45:42 +02:00
}
@Override
public String getRocksDBStats() {
if (closeRequested || closed) return null;
long closeReadLock = 0;
try {
closeReadLock = closeLock.tryReadLock(1, TimeUnit.SECONDS);
} catch (InterruptedException ignored) {}
try {
if (closeRequested || closed || closeReadLock == 0) return null;
ensureOpen();
StringBuilder aggregatedStats = new StringBuilder();
for (var entry : this.handles.entrySet()) {
aggregatedStats
.append(entry.getKey().name())
.append("\n")
.append(db.getProperty(entry.getValue(), "rocksdb.stats"))
.append("\n");
}
return aggregatedStats.toString();
} catch (RocksDBException e) {
throw new DBException("Failed to read stats", e);
} finally {
closeLock.unlockRead(closeReadLock);
}
2022-01-15 20:00:10 +01:00
}
2021-06-27 15:06:48 +02:00
@Override
public Stream<TableWithProperties> getTableProperties() {
return handles.entrySet().stream().flatMap(handle -> {
if (closeRequested || closed) {
return null;
}
long closeReadLock = 0;
try {
closeReadLock = closeLock.tryReadLock(1, TimeUnit.SECONDS);
} catch (InterruptedException ignored) {
}
try {
if (closeRequested || closed || closeReadLock == 0) {
2021-06-27 15:06:48 +02:00
return null;
}
ensureOpen();
return db
.getPropertiesOfAllTables(handle.getValue())
.entrySet()
.stream()
.map(entry -> new TableWithProperties(handle.getKey().name(), entry.getKey(), entry.getValue()));
} catch (RocksDBException e) {
throw new CompletionException(new DBException("Failed to read stats", e));
} finally {
closeLock.unlockRead(closeReadLock);
}
});
2021-06-27 15:06:48 +02:00
}
@Override
public void verifyChecksum() {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
db.verifyChecksum();
} catch (RocksDBException e) {
throw new DBException("Failed to verify checksum of database \"" + getDatabaseName() + "\"", e);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
2023-02-28 23:10:31 +01:00
public void compact() {
this.forceCompaction(getLastVolumeId());
}
2021-05-03 21:41:51 +02:00
@Override
public void flush() {
try (var fo = new FlushOptions().setWaitForFlush(true)) {
this.flush(fo);
} catch (RocksDBException ex) {
if (!"ShutdownInProgress".equals(ex.getMessage())) {
throw new DBException(ex);
}
logger.warn("Shutdown in progress. Flush cancelled", ex);
}
2021-05-03 21:41:51 +02:00
}
@Override
public MeterRegistry getMeterRegistry() {
return meterRegistry;
}
2020-12-07 22:15:18 +01:00
@Override
public LLSnapshot takeSnapshot() {
if (!ALLOW_SNAPSHOTS) throw new UnsupportedOperationException("Snapshots are disabled!");
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
return snapshotTime.record(() -> {
var snapshot = db.getSnapshot();
long currentSnapshotSequenceNumber = nextSnapshotNumbers.getAndIncrement();
this.snapshotsHandles.put(currentSnapshotSequenceNumber, snapshot);
return new LLSnapshot(currentSnapshotSequenceNumber);
});
} finally {
closeLock.unlockRead(closeReadLock);
}
2020-12-07 22:15:18 +01:00
}
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
var closeReadLock = closeLock.readLock();
try (var dbSnapshot = this.snapshotsHandles.remove(snapshot.getSequenceNumber())) {
if (dbSnapshot == null) {
throw new DBException("Snapshot " + snapshot.getSequenceNumber() + " not found!");
}
if (!db.isOwningHandle()) {
return;
}
db.releaseSnapshot(dbSnapshot);
} finally {
closeLock.unlockRead(closeReadLock);
}
2020-12-07 22:15:18 +01:00
}
@Override
public void close() {
closeRequested = true;
if (statistics != null) {
statistics.close();
statistics = null;
}
try {
flushAndCloseDb(db,
standardCache,
new ArrayList<>(handles.values())
);
handles.values().forEach(columnFamilyHandleRocksObj -> {
if (LLUtils.isAccessible(columnFamilyHandleRocksObj)) {
columnFamilyHandleRocksObj.close();
}
});
handles.clear();
deleteUnusedOldLogFiles();
2023-03-20 00:36:27 +01:00
} catch (Exception e) {
throw new DBException("Failed to close", e);
}
2020-12-07 22:15:18 +01:00
}
private void pauseWrites() {
try {
2022-08-15 23:07:17 +02:00
db.pauseBackgroundWork();
db.disableFileDeletions();
} catch (RocksDBException e) {
throw new DBException(e);
}
2022-08-15 23:07:17 +02:00
}
private void resumeWrites() {
try {
2022-08-15 23:07:17 +02:00
db.continueBackgroundWork();
db.enableFileDeletions(false);
} catch (RocksDBException e) {
throw new DBException(e);
}
2022-08-15 23:07:17 +02:00
}
2020-12-07 22:15:18 +01:00
/**
* Call this method ONLY AFTER flushing completely a db and closing it!
*/
2021-01-30 00:24:55 +01:00
@SuppressWarnings("unused")
2020-12-07 22:15:18 +01:00
private void deleteUnusedOldLogFiles() {
if (!DELETE_LOG_FILES) {
return;
}
2020-12-07 22:15:18 +01:00
Path basePath = dbPath;
try {
try (var f = Files.walk(basePath, 1)) {
f.filter(p -> !p.equals(basePath)).filter(p -> {
var fileName = p.getFileName().toString();
if (fileName.startsWith("LOG.old.")) {
var parts = fileName.split("\\.");
if (parts.length == 3) {
try {
long nameSuffix = Long.parseUnsignedLong(parts[2]);
return true;
} catch (NumberFormatException ex) {
return false;
2020-12-07 22:15:18 +01:00
}
}
}
if (fileName.endsWith(".log")) {
var parts = fileName.split("\\.");
if (parts.length == 2) {
try {
int name = Integer.parseUnsignedInt(parts[0]);
return true;
} catch (NumberFormatException ex) {
return false;
2020-12-07 22:15:18 +01:00
}
}
}
return false;
}).filter(p -> {
try {
BasicFileAttributes attrs = Files.readAttributes(p, BasicFileAttributes.class);
if (attrs.isRegularFile() && !attrs.isSymbolicLink() && !attrs.isDirectory()) {
long ctime = attrs.creationTime().toMillis();
long atime = attrs.lastAccessTime().toMillis();
long mtime = attrs.lastModifiedTime().toMillis();
long lastTime = Math.max(Math.max(ctime, atime), mtime);
long safeTime;
if (p.getFileName().toString().startsWith("LOG.old.")) {
safeTime = System.currentTimeMillis() - Duration.ofHours(24).toMillis();
} else {
safeTime = System.currentTimeMillis() - Duration.ofHours(12).toMillis();
}
if (lastTime < safeTime) {
return true;
2020-12-07 22:15:18 +01:00
}
}
} catch (IOException ex) {
logger.error("Error when deleting unused log files", ex);
2020-12-07 22:15:18 +01:00
return false;
}
return false;
}).forEach(path -> {
try {
Files.deleteIfExists(path);
System.out.println("Deleted log file \"" + path + "\"");
} catch (IOException e) {
logger.error(MARKER_ROCKSDB, "Failed to delete log file \"" + path + "\"", e);
}
});
}
2020-12-07 22:15:18 +01:00
} catch (IOException ex) {
2021-09-10 12:13:52 +02:00
logger.error(MARKER_ROCKSDB, "Failed to delete unused log files", ex);
2020-12-07 22:15:18 +01:00
}
}
2020-12-07 22:15:18 +01:00
}