Secure database shutdown, deduplicate compaction script

This commit is contained in:
Andrea Cavalli 2022-04-30 01:49:44 +02:00
parent 9d16ccdd9e
commit e962ae6336
7 changed files with 596 additions and 559 deletions

View File

@ -23,14 +23,18 @@ import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.disk.LLLocalDictionary.ReleasableSliceImplWithRelease;
import it.cavallium.dbengine.database.disk.LLLocalDictionary.ReleasableSliceImplWithoutRelease;
import it.cavallium.dbengine.database.serialization.SerializationException;
import it.cavallium.dbengine.rpc.current.data.Column;
import it.cavallium.dbengine.rpc.current.data.NamedColumnOptions;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.StampedLock;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.NotNull;
@ -72,7 +76,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
private final ColumnFamilyHandle cfh;
protected final MeterRegistry meterRegistry;
protected final Lock accessibilityLock;
protected final StampedLock closeLock;
protected final String columnName;
protected final DistributionSummary keyBufferSize;
@ -106,7 +110,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
String databaseName,
ColumnFamilyHandle cfh,
MeterRegistry meterRegistry,
Lock accessibilityLock) {
StampedLock closeLock) {
this.db = db;
this.nettyDirect = nettyDirect && alloc.getAllocationType() == OFF_HEAP;
this.alloc = alloc;
@ -119,7 +123,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
}
this.columnName = columnName;
this.meterRegistry = meterRegistry;
this.accessibilityLock = accessibilityLock;
this.closeLock = closeLock;
this.keyBufferSize = DistributionSummary
.builder("buffer.size.distribution")
@ -327,21 +331,26 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
return cfh;
}
@Override
public @Nullable Buffer get(@NotNull ReadOptions readOptions, Buffer key)
throws RocksDBException {
protected void ensureOpen() {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called dbGet in a nonblocking thread");
throw new UnsupportedOperationException("Called in a nonblocking thread");
}
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
ensureOwned(db);
ensureOwned(cfh);
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
protected void ensureOwned(org.rocksdb.RocksObject rocksObject) {
if (!rocksObject.isOwningHandle()) {
throw new IllegalStateException("Not owning handle");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
@Override
public @Nullable Buffer get(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
keyBufferSize.record(key.readableBytes());
int readAttemptsCount = 0;
try {
@ -372,7 +381,8 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
assert resultBuffer.writerOffset() == 0;
var resultWritable = ((WritableComponent) resultBuffer).writableBuffer();
var keyMayExist = db.keyMayExist(cfh, readOptions, keyNioBuffer.rewind(), resultWritable.clear());
var keyMayExist = db.keyMayExist(cfh, readOptions, keyNioBuffer.rewind(),
resultWritable.clear());
KeyMayExistEnum keyMayExistState = keyMayExist.exists;
int keyMayExistValueLength = keyMayExist.valueLength;
// At the beginning, size reflects the expected size, then it becomes the real data size
@ -492,23 +502,18 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
} finally {
readAttempts.record(readAttemptsCount);
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void put(@NotNull WriteOptions writeOptions, Buffer key, Buffer value) throws RocksDBException {
var closeReadLock = closeLock.readLock();
try {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called dbPut in a nonblocking thread");
}
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!writeOptions.isOwningHandle()) {
throw new IllegalStateException("WriteOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
try {
ensureOpen();
ensureOwned(writeOptions);
assert key.isAccessible();
assert value.isAccessible();
this.keyBufferSize.record(key.readableBytes());
@ -570,22 +575,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
writeOptions.close();
}
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public boolean exists(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called containsKey in a nonblocking thread");
}
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
if (nettyDirect) {
// Get the key nio buffer to pass to RocksDB
ByteBuffer keyNioBuffer;
@ -655,22 +655,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
}
return found;
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public boolean mayExists(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called containsKey in a nonblocking thread");
}
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
if (nettyDirect) {
// Get the key nio buffer to pass to RocksDB
ByteBuffer keyNioBuffer;
@ -706,19 +701,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
}
}
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void delete(WriteOptions writeOptions, Buffer key) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!writeOptions.isOwningHandle()) {
throw new IllegalStateException("WriteOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(writeOptions);
keyBufferSize.record(key.readableBytes());
if (nettyDirect) {
// Get the key nio buffer to pass to RocksDB
@ -748,115 +741,108 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
} else {
db.delete(cfh, writeOptions, LLUtils.toArray(key));
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void delete(WriteOptions writeOptions, byte[] key) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!writeOptions.isOwningHandle()) {
throw new IllegalStateException("WriteOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(writeOptions);
keyBufferSize.record(key.length);
db.delete(cfh, writeOptions, key);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public List<byte[]> multiGetAsList(ReadOptions readOptions, List<byte[]> keys) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
for (byte[] key : keys) {
keyBufferSize.record(key.length);
}
var columnFamilyHandles = new RepeatedElementList<>(cfh, keys.size());
return db.multiGetAsList(readOptions, columnFamilyHandles, keys);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void suggestCompactRange() throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
db.suggestCompactRange(cfh);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void compactRange(byte[] begin, byte[] end, CompactRangeOptions options)
throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!options.isOwningHandle()) {
throw new IllegalStateException("CompactRangeOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
public void compactRange(byte[] begin, byte[] end, CompactRangeOptions options) throws RocksDBException {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(options);
db.compactRange(cfh, begin, end, options);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void flush(FlushOptions options) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!options.isOwningHandle()) {
throw new IllegalStateException("FlushOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(options);
db.flush(options, cfh);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void flushWal(boolean sync) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
db.flushWal(sync);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public long getLongProperty(String property) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
return db.getLongProperty(cfh, property);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
@Override
public void write(WriteOptions writeOptions, WriteBatch writeBatch) throws RocksDBException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!writeOptions.isOwningHandle()) {
throw new IllegalStateException("WriteOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(writeOptions);
ensureOwned(writeBatch);
db.write(writeOptions, writeBatch);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
/**
@ -872,28 +858,24 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
Buffer key,
BinarySerializationFunction updater,
UpdateAtomicResultMode returnMode) throws IOException {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
try {
keyBufferSize.record(key.readableBytes());
startedUpdate.increment();
accessibilityLock.lock();
return updateAtomicImpl(readOptions, writeOptions, key, updater, returnMode);
} catch (IOException e) {
throw e;
} catch (Exception e) {
throw new IOException(e);
} finally {
accessibilityLock.unlock();
endedUpdate.increment();
}
} finally {
closeLock.unlockRead(closeReadLock);
}
}
protected final void recordAtomicUpdateTime(boolean changed, boolean prevSet, boolean newSet, long initTime) {
@ -923,16 +905,12 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
@Override
@NotNull
public RocksDBIterator newIterator(@NotNull ReadOptions readOptions) {
if (!db.isOwningHandle()) {
throw new IllegalStateException("Database is closed");
}
if (!readOptions.isOwningHandle()) {
throw new IllegalStateException("ReadOptions is closed");
}
if (!cfh.isOwningHandle()) {
throw new IllegalStateException("Column family is closed");
}
return new RocksDBIterator(db.newIterator(cfh, readOptions),
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
ensureOwned(readOptions);
var it = db.newIterator(cfh, readOptions);
return new RocksDBIterator(it,
nettyDirect,
this.startedIterSeek,
this.endedIterSeek,
@ -941,6 +919,9 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
this.endedIterNext,
this.iterNextTime
);
} finally {
closeLock.unlockRead(closeReadLock);
}
}
protected final Buffer applyUpdateAndCloseIfNecessary(BinarySerializationFunction updater,
@ -957,31 +938,18 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
return newData;
}
protected int getLastLevel() {
return RocksDBUtils.getLastLevel(db, cfh);
}
@Override
public final void forceCompaction(int volumeId) throws RocksDBException {
List<String> files = new ArrayList<>();
var meta = db.getColumnFamilyMetaData(cfh);
int bottommostLevel = -1;
for (LevelMetaData level : meta.levels()) {
bottommostLevel = Math.max(bottommostLevel, level.level());
}
int count = 0;
x: for (LevelMetaData level : meta.levels()) {
for (SstFileMetaData file : level.files()) {
if (file.fileName().endsWith(".sst")) {
files.add(file.fileName());
count++;
if (count >= 4) {
break x;
}
}
}
}
try (var co = new CompactionOptions()) {
if (!files.isEmpty() && bottommostLevel != -1) {
db.compactFiles(co, cfh, files, bottommostLevel, volumeId, null);
}
db.compactRange(cfh);
public final void forceCompaction(int volumeId) {
var closeReadLock = closeLock.readLock();
try {
ensureOpen();
RocksDBUtils.forceCompaction(db, db.getName(), cfh, volumeId, logger);
} finally {
closeLock.unlockRead(closeReadLock);
}
}

View File

@ -40,6 +40,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
@ -47,8 +48,8 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.StampedLock;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.logging.log4j.LogManager;
@ -62,7 +63,6 @@ import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.CompactRangeOptions;
import org.rocksdb.CompactRangeOptions.BottommostLevelCompaction;
import org.rocksdb.CompactionJobInfo;
import org.rocksdb.CompactionOptions;
import org.rocksdb.CompactionPriority;
@ -132,7 +132,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
private final HashMap<String, PersistentCache> persistentCaches;
private final ConcurrentHashMap<Long, Snapshot> snapshotsHandles = new ConcurrentHashMap<>();
private final AtomicLong nextSnapshotNumbers = new AtomicLong(1);
private final ReadWriteLock shutdownLock = new ReentrantReadWriteLock();
private final StampedLock closeLock = new StampedLock();
private volatile boolean closed = false;
@SuppressWarnings("SwitchStatementWithTooFewBranches")
@ -253,6 +253,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
}
if (!columnOptions.levels().isEmpty()) {
columnFamilyOptions.setNumLevels(columnOptions.levels().size());
var firstLevelOptions = getRocksLevelOptions(columnOptions.levels().get(0));
columnFamilyOptions.setCompressionType(firstLevelOptions.compressionType);
columnFamilyOptions.setCompressionOptions(firstLevelOptions.compressionOptions);
@ -269,9 +270,9 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
.map(v -> v.compression().getType())
.toList());
} else {
columnFamilyOptions.setNumLevels(7);
List<CompressionType> compressionTypes = new ArrayList<>(7);
for (int i = 0; i < 7; i++) {
columnFamilyOptions.setNumLevels(6);
List<CompressionType> compressionTypes = new ArrayList<>(6);
for (int i = 0; i < 6; i++) {
if (i < 2) {
compressionTypes.add(CompressionType.NO_COMPRESSION);
} else {
@ -560,60 +561,27 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
return paths.size() - 1;
}
public void forceCompaction(int volumeId) throws RocksDBException {
try (var co = new CompactionOptions()
.setCompression(CompressionType.LZ4_COMPRESSION)
.setMaxSubcompactions(0)
.setOutputFileSizeLimit(2 * SizeUnit.GB)) {
for (ColumnFamilyHandle cfh : this.handles.values()) {
List<String> files = new ArrayList<>();
var meta = db.getColumnFamilyMetaData(cfh);
int bottommostLevel = -1;
for (LevelMetaData level : meta.levels()) {
bottommostLevel = Math.max(bottommostLevel, level.level());
public int getLastLevel(Column column) {
return databaseOptions
.columnOptions()
.stream()
.filter(namedColumnOptions -> namedColumnOptions.columnName().equals(column.name()))
.findFirst()
.map(NamedColumnOptions::levels)
.filter(levels -> !levels.isEmpty())
.or(() -> Optional.of(databaseOptions.defaultColumnOptions().levels()).filter(levels -> !levels.isEmpty()))
.map(List::size)
.orElse(6);
}
for (LevelMetaData level : meta.levels()) {
if (level.level() < bottommostLevel) {
for (SstFileMetaData file : level.files()) {
if (file.fileName().endsWith(".sst")) {
files.add(file.fileName());
}
}
}
}
bottommostLevel = Math.max(bottommostLevel, databaseOptions.defaultColumnOptions().levels().size() - 1);
if (!files.isEmpty() && bottommostLevel != -1) {
var partitionSize = files.size() / Runtime.getRuntime().availableProcessors();
List<List<String>> partitions;
if (partitionSize > 0) {
partitions = partition(files, files.size() / Runtime.getRuntime().availableProcessors());
} else {
partitions = List.of(files);
}
int finalBottommostLevel = bottommostLevel;
Mono.when(partitions.stream().map(partition -> Mono.<Void>fromCallable(() -> {
logger.info("Compacting {} files in database {} in column family {} to level {}",
partition.size(),
name,
new String(cfh.getName(), StandardCharsets.UTF_8),
finalBottommostLevel
);
if (!partition.isEmpty()) {
var coi = new CompactionJobInfo();
db.compactFiles(co, cfh, partition, finalBottommostLevel, volumeId, coi);
logger.info("Compacted {} files in database {} in column family {} to level {}: {}",
partition.size(),
name,
new String(cfh.getName(), StandardCharsets.UTF_8),
finalBottommostLevel,
coi.status().getCodeString()
);
}
return null;
}).subscribeOn(Schedulers.boundedElastic())).toList()).block();
}
public List<String> getColumnFiles(Column column, boolean excludeLastLevel) {
var cfh = handles.get(column);
return RocksDBUtils.getColumnFiles(db, cfh, excludeLastLevel);
}
public void forceCompaction(int volumeId) throws RocksDBException {
for (var cfh : this.handles.values()) {
RocksDBUtils.forceCompaction(db, name, cfh, volumeId, logger);
}
}
@ -660,14 +628,13 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
return name;
}
public Lock getAccessibilityLock() {
return shutdownLock.readLock();
public StampedLock getCloseLock() {
return closeLock;
}
private void flushAndCloseDb(RocksDB db, Cache standardCache, Cache compressedCache, List<ColumnFamilyHandle> handles)
throws RocksDBException {
var shutdownWriteLock = shutdownLock.writeLock();
shutdownWriteLock.lock();
var closeWriteLock = closeLock.writeLock();
try {
if (closed) {
return;
@ -715,7 +682,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
}
}
} finally {
shutdownWriteLock.unlock();
closeLock.unlockWrite(closeWriteLock);
}
}
@ -1037,7 +1004,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
private RocksDBColumn getRocksDBColumn(RocksDB db, ColumnFamilyHandle cfh) {
var nettyDirect = databaseOptions.allowNettyDirect();
var accessibilityLock = getAccessibilityLock();
var closeLock = getCloseLock();
if (db instanceof OptimisticTransactionDB optimisticTransactionDB) {
return new OptimisticRocksDBColumn(optimisticTransactionDB,
nettyDirect,
@ -1045,7 +1012,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
name,
cfh,
meterRegistry,
accessibilityLock
closeLock
);
} else if (db instanceof TransactionDB transactionDB) {
return new PessimisticRocksDBColumn(transactionDB,
@ -1054,10 +1021,10 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
name,
cfh,
meterRegistry,
accessibilityLock
closeLock
);
} else {
return new StandardRocksDBColumn(db, nettyDirect, allocator, name, cfh, meterRegistry, accessibilityLock);
return new StandardRocksDBColumn(db, nettyDirect, allocator, name, cfh, meterRegistry, closeLock);
}
}
@ -1225,8 +1192,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
public Mono<LLSnapshot> takeSnapshot() {
return Mono
.fromCallable(() -> snapshotTime.recordCallable(() -> {
var shutdownReadLock = shutdownLock.readLock();
shutdownReadLock.lock();
var closeReadLock = closeLock.readLock();
try {
if (closed) {
throw new IllegalStateException("Database closed");
@ -1236,7 +1202,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
this.snapshotsHandles.put(currentSnapshotSequenceNumber, snapshot);
return new LLSnapshot(currentSnapshotSequenceNumber);
} finally {
shutdownReadLock.unlock();
closeLock.unlockRead(closeReadLock);
}
}))
.subscribeOn(dbRScheduler);
@ -1246,8 +1212,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
public Mono<Void> releaseSnapshot(LLSnapshot snapshot) {
return Mono
.<Void>fromCallable(() -> {
var shutdownReadLock = shutdownLock.readLock();
shutdownReadLock.lock();
var closeReadLock = closeLock.readLock();
try {
if (closed) {
throw new IllegalStateException("Database closed");
@ -1262,7 +1227,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
db.releaseSnapshot(dbSnapshot);
return null;
} finally {
shutdownReadLock.unlock();
closeLock.unlockRead(closeReadLock);
}
})
.subscribeOn(dbRScheduler);

View File

@ -15,6 +15,7 @@ import java.io.IOException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.LockSupport;
import java.util.concurrent.locks.StampedLock;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.ColumnFamilyHandle;
@ -39,8 +40,8 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
String databaseName,
ColumnFamilyHandle cfh,
MeterRegistry meterRegistry,
Lock accessibilityLock) {
super(db, nettyDirect, alloc, databaseName, cfh, meterRegistry, accessibilityLock);
StampedLock closeLock) {
super(db, nettyDirect, alloc, databaseName, cfh, meterRegistry, closeLock);
this.optimisticAttempts = DistributionSummary
.builder("db.optimistic.attempts.distribution")
.publishPercentiles(0.2, 0.5, 0.95)

View File

@ -11,6 +11,7 @@ import it.cavallium.dbengine.database.LLDelta;
import it.cavallium.dbengine.database.LLUtils;
import java.io.IOException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.StampedLock;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.ColumnFamilyHandle;
@ -32,8 +33,8 @@ public final class PessimisticRocksDBColumn extends AbstractRocksDBColumn<Transa
String dbName,
ColumnFamilyHandle cfh,
MeterRegistry meterRegistry,
Lock accessibilityLock) {
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, accessibilityLock);
StampedLock closeLock) {
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, closeLock);
}
@Override

View File

@ -56,8 +56,7 @@ public sealed interface RocksDBColumn permits AbstractRocksDBColumn {
void put(@NotNull WriteOptions writeOptions, Buffer key, Buffer value) throws RocksDBException;
default void put(@NotNull WriteOptions writeOptions, byte[] key, byte[] value)
throws RocksDBException {
default void put(@NotNull WriteOptions writeOptions, byte[] key, byte[] value) throws RocksDBException {
var allocator = getAllocator();
try (var keyBuf = allocator.allocate(key.length)) {
keyBuf.writeBytes(key);

View File

@ -0,0 +1,102 @@
package it.cavallium.dbengine.database.disk;
import static com.google.common.collect.Lists.partition;
import it.cavallium.dbengine.rpc.current.data.Column;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.CompactionJobInfo;
import org.rocksdb.CompactionOptions;
import org.rocksdb.CompressionType;
import org.rocksdb.LevelMetaData;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.SstFileMetaData;
import org.rocksdb.util.SizeUnit;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
public class RocksDBUtils {
public static int getLastLevel(RocksDB db, ColumnFamilyHandle cfh) {
var lastLevel = db.numberLevels(cfh);
if (lastLevel == 0) {
return 6;
} else {
return lastLevel;
}
}
public static List<String> getColumnFiles(RocksDB db, ColumnFamilyHandle cfh, boolean excludeLastLevel) {
List<String> files = new ArrayList<>();
var meta = db.getColumnFamilyMetaData(cfh);
var lastLevel = excludeLastLevel ? getLastLevel(db, cfh) : -1;
for (LevelMetaData level : meta.levels()) {
if (!excludeLastLevel || level.level() < lastLevel) {
for (SstFileMetaData file : level.files()) {
if (file.fileName().endsWith(".sst")) {
files.add(file.fileName());
}
}
}
}
return files;
}
public static void forceCompaction(RocksDB db,
String logDbName,
ColumnFamilyHandle cfh,
int volumeId,
Logger logger) {
try (var co = new CompactionOptions()
.setCompression(CompressionType.LZ4_COMPRESSION)
.setMaxSubcompactions(0)
.setOutputFileSizeLimit(2 * SizeUnit.GB)) {
List<String> filesToCompact = getColumnFiles(db, cfh, true);
if (!filesToCompact.isEmpty()) {
var partitionSize = filesToCompact.size() / Runtime.getRuntime().availableProcessors();
List<List<String>> partitions;
if (partitionSize > 0) {
partitions = partition(filesToCompact, partitionSize);
} else {
partitions = List.of(filesToCompact);
}
int finalBottommostLevel = getLastLevel(db, cfh);
Mono.whenDelayError(partitions.stream().map(partition -> Mono.<Void>fromCallable(() -> {
logger.info("Compacting {} files in database {} in column family {} to level {}",
partition.size(),
logDbName,
new String(cfh.getName(), StandardCharsets.UTF_8),
finalBottommostLevel
);
if (!partition.isEmpty()) {
var coi = new CompactionJobInfo();
try {
db.compactFiles(co, cfh, partition, finalBottommostLevel, volumeId, coi);
logger.info("Compacted {} files in database {} in column family {} to level {}: {}",
partition.size(),
logDbName,
new String(cfh.getName(), StandardCharsets.UTF_8),
finalBottommostLevel,
coi.status().getCodeString()
);
} catch (Throwable ex) {
logger.error("Failed to compact {} files in database {} in column family {} to level {}",
partition.size(),
logDbName,
new String(cfh.getName(), StandardCharsets.UTF_8),
finalBottommostLevel,
ex
);
}
}
return null;
}).subscribeOn(Schedulers.boundedElastic())).toList()).block();
}
}
}
}

View File

@ -9,6 +9,7 @@ import it.cavallium.dbengine.database.LLDelta;
import it.cavallium.dbengine.database.LLUtils;
import java.io.IOException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.StampedLock;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.ColumnFamilyHandle;
@ -23,8 +24,8 @@ public final class StandardRocksDBColumn extends AbstractRocksDBColumn<RocksDB>
boolean nettyDirect,
BufferAllocator alloc,
String dbName,
ColumnFamilyHandle cfh, MeterRegistry meterRegistry, Lock accessibilityLock) {
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, accessibilityLock);
ColumnFamilyHandle cfh, MeterRegistry meterRegistry, StampedLock closeLock) {
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, closeLock);
}
@Override