Secure database shutdown, deduplicate compaction script
This commit is contained in:
parent
9d16ccdd9e
commit
e962ae6336
@ -23,14 +23,18 @@ import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import it.cavallium.dbengine.database.disk.LLLocalDictionary.ReleasableSliceImplWithRelease;
|
||||
import it.cavallium.dbengine.database.disk.LLLocalDictionary.ReleasableSliceImplWithoutRelease;
|
||||
import it.cavallium.dbengine.database.serialization.SerializationException;
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import it.cavallium.dbengine.rpc.current.data.NamedColumnOptions;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
@ -72,7 +76,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
private final ColumnFamilyHandle cfh;
|
||||
|
||||
protected final MeterRegistry meterRegistry;
|
||||
protected final Lock accessibilityLock;
|
||||
protected final StampedLock closeLock;
|
||||
protected final String columnName;
|
||||
|
||||
protected final DistributionSummary keyBufferSize;
|
||||
@ -106,7 +110,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
String databaseName,
|
||||
ColumnFamilyHandle cfh,
|
||||
MeterRegistry meterRegistry,
|
||||
Lock accessibilityLock) {
|
||||
StampedLock closeLock) {
|
||||
this.db = db;
|
||||
this.nettyDirect = nettyDirect && alloc.getAllocationType() == OFF_HEAP;
|
||||
this.alloc = alloc;
|
||||
@ -119,7 +123,7 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
}
|
||||
this.columnName = columnName;
|
||||
this.meterRegistry = meterRegistry;
|
||||
this.accessibilityLock = accessibilityLock;
|
||||
this.closeLock = closeLock;
|
||||
|
||||
this.keyBufferSize = DistributionSummary
|
||||
.builder("buffer.size.distribution")
|
||||
@ -327,21 +331,26 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
return cfh;
|
||||
}
|
||||
|
||||
@Override
|
||||
public @Nullable Buffer get(@NotNull ReadOptions readOptions, Buffer key)
|
||||
throws RocksDBException {
|
||||
protected void ensureOpen() {
|
||||
if (Schedulers.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called dbGet in a nonblocking thread");
|
||||
throw new UnsupportedOperationException("Called in a nonblocking thread");
|
||||
}
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
ensureOwned(db);
|
||||
ensureOwned(cfh);
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
|
||||
protected void ensureOwned(org.rocksdb.RocksObject rocksObject) {
|
||||
if (!rocksObject.isOwningHandle()) {
|
||||
throw new IllegalStateException("Not owning handle");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
|
||||
@Override
|
||||
public @Nullable Buffer get(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
keyBufferSize.record(key.readableBytes());
|
||||
int readAttemptsCount = 0;
|
||||
try {
|
||||
@ -372,7 +381,8 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
assert resultBuffer.writerOffset() == 0;
|
||||
var resultWritable = ((WritableComponent) resultBuffer).writableBuffer();
|
||||
|
||||
var keyMayExist = db.keyMayExist(cfh, readOptions, keyNioBuffer.rewind(), resultWritable.clear());
|
||||
var keyMayExist = db.keyMayExist(cfh, readOptions, keyNioBuffer.rewind(),
|
||||
resultWritable.clear());
|
||||
KeyMayExistEnum keyMayExistState = keyMayExist.exists;
|
||||
int keyMayExistValueLength = keyMayExist.valueLength;
|
||||
// At the beginning, size reflects the expected size, then it becomes the real data size
|
||||
@ -492,23 +502,18 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
} finally {
|
||||
readAttempts.record(readAttemptsCount);
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(@NotNull WriteOptions writeOptions, Buffer key, Buffer value) throws RocksDBException {
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
if (Schedulers.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called dbPut in a nonblocking thread");
|
||||
}
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!writeOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("WriteOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(writeOptions);
|
||||
assert key.isAccessible();
|
||||
assert value.isAccessible();
|
||||
this.keyBufferSize.record(key.readableBytes());
|
||||
@ -570,22 +575,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
writeOptions.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
|
||||
if (Schedulers.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called containsKey in a nonblocking thread");
|
||||
}
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
if (nettyDirect) {
|
||||
// Get the key nio buffer to pass to RocksDB
|
||||
ByteBuffer keyNioBuffer;
|
||||
@ -655,22 +655,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
}
|
||||
return found;
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean mayExists(@NotNull ReadOptions readOptions, Buffer key) throws RocksDBException {
|
||||
if (Schedulers.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called containsKey in a nonblocking thread");
|
||||
}
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
if (nettyDirect) {
|
||||
// Get the key nio buffer to pass to RocksDB
|
||||
ByteBuffer keyNioBuffer;
|
||||
@ -706,19 +701,17 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(WriteOptions writeOptions, Buffer key) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!writeOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("WriteOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(writeOptions);
|
||||
keyBufferSize.record(key.readableBytes());
|
||||
if (nettyDirect) {
|
||||
// Get the key nio buffer to pass to RocksDB
|
||||
@ -748,115 +741,108 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
} else {
|
||||
db.delete(cfh, writeOptions, LLUtils.toArray(key));
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delete(WriteOptions writeOptions, byte[] key) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!writeOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("WriteOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(writeOptions);
|
||||
keyBufferSize.record(key.length);
|
||||
db.delete(cfh, writeOptions, key);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<byte[]> multiGetAsList(ReadOptions readOptions, List<byte[]> keys) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
for (byte[] key : keys) {
|
||||
keyBufferSize.record(key.length);
|
||||
}
|
||||
var columnFamilyHandles = new RepeatedElementList<>(cfh, keys.size());
|
||||
return db.multiGetAsList(readOptions, columnFamilyHandles, keys);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void suggestCompactRange() throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
db.suggestCompactRange(cfh);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void compactRange(byte[] begin, byte[] end, CompactRangeOptions options)
|
||||
throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!options.isOwningHandle()) {
|
||||
throw new IllegalStateException("CompactRangeOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
public void compactRange(byte[] begin, byte[] end, CompactRangeOptions options) throws RocksDBException {
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(options);
|
||||
db.compactRange(cfh, begin, end, options);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush(FlushOptions options) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!options.isOwningHandle()) {
|
||||
throw new IllegalStateException("FlushOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(options);
|
||||
db.flush(options, cfh);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flushWal(boolean sync) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
db.flushWal(sync);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLongProperty(String property) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
return db.getLongProperty(cfh, property);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(WriteOptions writeOptions, WriteBatch writeBatch) throws RocksDBException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!writeOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("WriteOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(writeOptions);
|
||||
ensureOwned(writeBatch);
|
||||
db.write(writeOptions, writeBatch);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -872,28 +858,24 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
Buffer key,
|
||||
BinarySerializationFunction updater,
|
||||
UpdateAtomicResultMode returnMode) throws IOException {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
try {
|
||||
keyBufferSize.record(key.readableBytes());
|
||||
startedUpdate.increment();
|
||||
accessibilityLock.lock();
|
||||
return updateAtomicImpl(readOptions, writeOptions, key, updater, returnMode);
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
} finally {
|
||||
accessibilityLock.unlock();
|
||||
endedUpdate.increment();
|
||||
}
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
protected final void recordAtomicUpdateTime(boolean changed, boolean prevSet, boolean newSet, long initTime) {
|
||||
@ -923,16 +905,12 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
@Override
|
||||
@NotNull
|
||||
public RocksDBIterator newIterator(@NotNull ReadOptions readOptions) {
|
||||
if (!db.isOwningHandle()) {
|
||||
throw new IllegalStateException("Database is closed");
|
||||
}
|
||||
if (!readOptions.isOwningHandle()) {
|
||||
throw new IllegalStateException("ReadOptions is closed");
|
||||
}
|
||||
if (!cfh.isOwningHandle()) {
|
||||
throw new IllegalStateException("Column family is closed");
|
||||
}
|
||||
return new RocksDBIterator(db.newIterator(cfh, readOptions),
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
ensureOwned(readOptions);
|
||||
var it = db.newIterator(cfh, readOptions);
|
||||
return new RocksDBIterator(it,
|
||||
nettyDirect,
|
||||
this.startedIterSeek,
|
||||
this.endedIterSeek,
|
||||
@ -941,6 +919,9 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
this.endedIterNext,
|
||||
this.iterNextTime
|
||||
);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
protected final Buffer applyUpdateAndCloseIfNecessary(BinarySerializationFunction updater,
|
||||
@ -957,31 +938,18 @@ public sealed abstract class AbstractRocksDBColumn<T extends RocksDB> implements
|
||||
return newData;
|
||||
}
|
||||
|
||||
protected int getLastLevel() {
|
||||
return RocksDBUtils.getLastLevel(db, cfh);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void forceCompaction(int volumeId) throws RocksDBException {
|
||||
List<String> files = new ArrayList<>();
|
||||
var meta = db.getColumnFamilyMetaData(cfh);
|
||||
int bottommostLevel = -1;
|
||||
for (LevelMetaData level : meta.levels()) {
|
||||
bottommostLevel = Math.max(bottommostLevel, level.level());
|
||||
}
|
||||
int count = 0;
|
||||
x: for (LevelMetaData level : meta.levels()) {
|
||||
for (SstFileMetaData file : level.files()) {
|
||||
if (file.fileName().endsWith(".sst")) {
|
||||
files.add(file.fileName());
|
||||
count++;
|
||||
if (count >= 4) {
|
||||
break x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
try (var co = new CompactionOptions()) {
|
||||
if (!files.isEmpty() && bottommostLevel != -1) {
|
||||
db.compactFiles(co, cfh, files, bottommostLevel, volumeId, null);
|
||||
}
|
||||
db.compactRange(cfh);
|
||||
public final void forceCompaction(int volumeId) {
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
ensureOpen();
|
||||
RocksDBUtils.forceCompaction(db, db.getName(), cfh, volumeId, logger);
|
||||
} finally {
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,6 +40,7 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@ -47,8 +48,8 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang3.time.StopWatch;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -62,7 +63,6 @@ import org.rocksdb.ColumnFamilyDescriptor;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
import org.rocksdb.ColumnFamilyOptions;
|
||||
import org.rocksdb.CompactRangeOptions;
|
||||
import org.rocksdb.CompactRangeOptions.BottommostLevelCompaction;
|
||||
import org.rocksdb.CompactionJobInfo;
|
||||
import org.rocksdb.CompactionOptions;
|
||||
import org.rocksdb.CompactionPriority;
|
||||
@ -132,7 +132,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
private final HashMap<String, PersistentCache> persistentCaches;
|
||||
private final ConcurrentHashMap<Long, Snapshot> snapshotsHandles = new ConcurrentHashMap<>();
|
||||
private final AtomicLong nextSnapshotNumbers = new AtomicLong(1);
|
||||
private final ReadWriteLock shutdownLock = new ReentrantReadWriteLock();
|
||||
private final StampedLock closeLock = new StampedLock();
|
||||
private volatile boolean closed = false;
|
||||
|
||||
@SuppressWarnings("SwitchStatementWithTooFewBranches")
|
||||
@ -253,6 +253,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
}
|
||||
|
||||
if (!columnOptions.levels().isEmpty()) {
|
||||
columnFamilyOptions.setNumLevels(columnOptions.levels().size());
|
||||
var firstLevelOptions = getRocksLevelOptions(columnOptions.levels().get(0));
|
||||
columnFamilyOptions.setCompressionType(firstLevelOptions.compressionType);
|
||||
columnFamilyOptions.setCompressionOptions(firstLevelOptions.compressionOptions);
|
||||
@ -269,9 +270,9 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
.map(v -> v.compression().getType())
|
||||
.toList());
|
||||
} else {
|
||||
columnFamilyOptions.setNumLevels(7);
|
||||
List<CompressionType> compressionTypes = new ArrayList<>(7);
|
||||
for (int i = 0; i < 7; i++) {
|
||||
columnFamilyOptions.setNumLevels(6);
|
||||
List<CompressionType> compressionTypes = new ArrayList<>(6);
|
||||
for (int i = 0; i < 6; i++) {
|
||||
if (i < 2) {
|
||||
compressionTypes.add(CompressionType.NO_COMPRESSION);
|
||||
} else {
|
||||
@ -560,60 +561,27 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
return paths.size() - 1;
|
||||
}
|
||||
|
||||
public void forceCompaction(int volumeId) throws RocksDBException {
|
||||
try (var co = new CompactionOptions()
|
||||
.setCompression(CompressionType.LZ4_COMPRESSION)
|
||||
.setMaxSubcompactions(0)
|
||||
.setOutputFileSizeLimit(2 * SizeUnit.GB)) {
|
||||
for (ColumnFamilyHandle cfh : this.handles.values()) {
|
||||
List<String> files = new ArrayList<>();
|
||||
var meta = db.getColumnFamilyMetaData(cfh);
|
||||
int bottommostLevel = -1;
|
||||
for (LevelMetaData level : meta.levels()) {
|
||||
bottommostLevel = Math.max(bottommostLevel, level.level());
|
||||
public int getLastLevel(Column column) {
|
||||
return databaseOptions
|
||||
.columnOptions()
|
||||
.stream()
|
||||
.filter(namedColumnOptions -> namedColumnOptions.columnName().equals(column.name()))
|
||||
.findFirst()
|
||||
.map(NamedColumnOptions::levels)
|
||||
.filter(levels -> !levels.isEmpty())
|
||||
.or(() -> Optional.of(databaseOptions.defaultColumnOptions().levels()).filter(levels -> !levels.isEmpty()))
|
||||
.map(List::size)
|
||||
.orElse(6);
|
||||
}
|
||||
for (LevelMetaData level : meta.levels()) {
|
||||
if (level.level() < bottommostLevel) {
|
||||
for (SstFileMetaData file : level.files()) {
|
||||
if (file.fileName().endsWith(".sst")) {
|
||||
files.add(file.fileName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bottommostLevel = Math.max(bottommostLevel, databaseOptions.defaultColumnOptions().levels().size() - 1);
|
||||
|
||||
if (!files.isEmpty() && bottommostLevel != -1) {
|
||||
var partitionSize = files.size() / Runtime.getRuntime().availableProcessors();
|
||||
List<List<String>> partitions;
|
||||
if (partitionSize > 0) {
|
||||
partitions = partition(files, files.size() / Runtime.getRuntime().availableProcessors());
|
||||
} else {
|
||||
partitions = List.of(files);
|
||||
}
|
||||
int finalBottommostLevel = bottommostLevel;
|
||||
Mono.when(partitions.stream().map(partition -> Mono.<Void>fromCallable(() -> {
|
||||
logger.info("Compacting {} files in database {} in column family {} to level {}",
|
||||
partition.size(),
|
||||
name,
|
||||
new String(cfh.getName(), StandardCharsets.UTF_8),
|
||||
finalBottommostLevel
|
||||
);
|
||||
if (!partition.isEmpty()) {
|
||||
var coi = new CompactionJobInfo();
|
||||
db.compactFiles(co, cfh, partition, finalBottommostLevel, volumeId, coi);
|
||||
logger.info("Compacted {} files in database {} in column family {} to level {}: {}",
|
||||
partition.size(),
|
||||
name,
|
||||
new String(cfh.getName(), StandardCharsets.UTF_8),
|
||||
finalBottommostLevel,
|
||||
coi.status().getCodeString()
|
||||
);
|
||||
}
|
||||
return null;
|
||||
}).subscribeOn(Schedulers.boundedElastic())).toList()).block();
|
||||
}
|
||||
public List<String> getColumnFiles(Column column, boolean excludeLastLevel) {
|
||||
var cfh = handles.get(column);
|
||||
return RocksDBUtils.getColumnFiles(db, cfh, excludeLastLevel);
|
||||
}
|
||||
|
||||
public void forceCompaction(int volumeId) throws RocksDBException {
|
||||
for (var cfh : this.handles.values()) {
|
||||
RocksDBUtils.forceCompaction(db, name, cfh, volumeId, logger);
|
||||
}
|
||||
}
|
||||
|
||||
@ -660,14 +628,13 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Lock getAccessibilityLock() {
|
||||
return shutdownLock.readLock();
|
||||
public StampedLock getCloseLock() {
|
||||
return closeLock;
|
||||
}
|
||||
|
||||
private void flushAndCloseDb(RocksDB db, Cache standardCache, Cache compressedCache, List<ColumnFamilyHandle> handles)
|
||||
throws RocksDBException {
|
||||
var shutdownWriteLock = shutdownLock.writeLock();
|
||||
shutdownWriteLock.lock();
|
||||
var closeWriteLock = closeLock.writeLock();
|
||||
try {
|
||||
if (closed) {
|
||||
return;
|
||||
@ -715,7 +682,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
shutdownWriteLock.unlock();
|
||||
closeLock.unlockWrite(closeWriteLock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1037,7 +1004,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
|
||||
private RocksDBColumn getRocksDBColumn(RocksDB db, ColumnFamilyHandle cfh) {
|
||||
var nettyDirect = databaseOptions.allowNettyDirect();
|
||||
var accessibilityLock = getAccessibilityLock();
|
||||
var closeLock = getCloseLock();
|
||||
if (db instanceof OptimisticTransactionDB optimisticTransactionDB) {
|
||||
return new OptimisticRocksDBColumn(optimisticTransactionDB,
|
||||
nettyDirect,
|
||||
@ -1045,7 +1012,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
name,
|
||||
cfh,
|
||||
meterRegistry,
|
||||
accessibilityLock
|
||||
closeLock
|
||||
);
|
||||
} else if (db instanceof TransactionDB transactionDB) {
|
||||
return new PessimisticRocksDBColumn(transactionDB,
|
||||
@ -1054,10 +1021,10 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
name,
|
||||
cfh,
|
||||
meterRegistry,
|
||||
accessibilityLock
|
||||
closeLock
|
||||
);
|
||||
} else {
|
||||
return new StandardRocksDBColumn(db, nettyDirect, allocator, name, cfh, meterRegistry, accessibilityLock);
|
||||
return new StandardRocksDBColumn(db, nettyDirect, allocator, name, cfh, meterRegistry, closeLock);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1225,8 +1192,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
public Mono<LLSnapshot> takeSnapshot() {
|
||||
return Mono
|
||||
.fromCallable(() -> snapshotTime.recordCallable(() -> {
|
||||
var shutdownReadLock = shutdownLock.readLock();
|
||||
shutdownReadLock.lock();
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("Database closed");
|
||||
@ -1236,7 +1202,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
this.snapshotsHandles.put(currentSnapshotSequenceNumber, snapshot);
|
||||
return new LLSnapshot(currentSnapshotSequenceNumber);
|
||||
} finally {
|
||||
shutdownReadLock.unlock();
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
}))
|
||||
.subscribeOn(dbRScheduler);
|
||||
@ -1246,8 +1212,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
public Mono<Void> releaseSnapshot(LLSnapshot snapshot) {
|
||||
return Mono
|
||||
.<Void>fromCallable(() -> {
|
||||
var shutdownReadLock = shutdownLock.readLock();
|
||||
shutdownReadLock.lock();
|
||||
var closeReadLock = closeLock.readLock();
|
||||
try {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("Database closed");
|
||||
@ -1262,7 +1227,7 @@ public class LLLocalKeyValueDatabase implements LLKeyValueDatabase {
|
||||
db.releaseSnapshot(dbSnapshot);
|
||||
return null;
|
||||
} finally {
|
||||
shutdownReadLock.unlock();
|
||||
closeLock.unlockRead(closeReadLock);
|
||||
}
|
||||
})
|
||||
.subscribeOn(dbRScheduler);
|
||||
|
@ -15,6 +15,7 @@ import java.io.IOException;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
@ -39,8 +40,8 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
|
||||
String databaseName,
|
||||
ColumnFamilyHandle cfh,
|
||||
MeterRegistry meterRegistry,
|
||||
Lock accessibilityLock) {
|
||||
super(db, nettyDirect, alloc, databaseName, cfh, meterRegistry, accessibilityLock);
|
||||
StampedLock closeLock) {
|
||||
super(db, nettyDirect, alloc, databaseName, cfh, meterRegistry, closeLock);
|
||||
this.optimisticAttempts = DistributionSummary
|
||||
.builder("db.optimistic.attempts.distribution")
|
||||
.publishPercentiles(0.2, 0.5, 0.95)
|
||||
|
@ -11,6 +11,7 @@ import it.cavallium.dbengine.database.LLDelta;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
@ -32,8 +33,8 @@ public final class PessimisticRocksDBColumn extends AbstractRocksDBColumn<Transa
|
||||
String dbName,
|
||||
ColumnFamilyHandle cfh,
|
||||
MeterRegistry meterRegistry,
|
||||
Lock accessibilityLock) {
|
||||
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, accessibilityLock);
|
||||
StampedLock closeLock) {
|
||||
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, closeLock);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -56,8 +56,7 @@ public sealed interface RocksDBColumn permits AbstractRocksDBColumn {
|
||||
|
||||
void put(@NotNull WriteOptions writeOptions, Buffer key, Buffer value) throws RocksDBException;
|
||||
|
||||
default void put(@NotNull WriteOptions writeOptions, byte[] key, byte[] value)
|
||||
throws RocksDBException {
|
||||
default void put(@NotNull WriteOptions writeOptions, byte[] key, byte[] value) throws RocksDBException {
|
||||
var allocator = getAllocator();
|
||||
try (var keyBuf = allocator.allocate(key.length)) {
|
||||
keyBuf.writeBytes(key);
|
||||
|
@ -0,0 +1,102 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import static com.google.common.collect.Lists.partition;
|
||||
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
import org.rocksdb.CompactionJobInfo;
|
||||
import org.rocksdb.CompactionOptions;
|
||||
import org.rocksdb.CompressionType;
|
||||
import org.rocksdb.LevelMetaData;
|
||||
import org.rocksdb.RocksDB;
|
||||
import org.rocksdb.RocksDBException;
|
||||
import org.rocksdb.SstFileMetaData;
|
||||
import org.rocksdb.util.SizeUnit;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
public class RocksDBUtils {
|
||||
|
||||
public static int getLastLevel(RocksDB db, ColumnFamilyHandle cfh) {
|
||||
var lastLevel = db.numberLevels(cfh);
|
||||
if (lastLevel == 0) {
|
||||
return 6;
|
||||
} else {
|
||||
return lastLevel;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> getColumnFiles(RocksDB db, ColumnFamilyHandle cfh, boolean excludeLastLevel) {
|
||||
List<String> files = new ArrayList<>();
|
||||
var meta = db.getColumnFamilyMetaData(cfh);
|
||||
var lastLevel = excludeLastLevel ? getLastLevel(db, cfh) : -1;
|
||||
for (LevelMetaData level : meta.levels()) {
|
||||
if (!excludeLastLevel || level.level() < lastLevel) {
|
||||
for (SstFileMetaData file : level.files()) {
|
||||
if (file.fileName().endsWith(".sst")) {
|
||||
files.add(file.fileName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
public static void forceCompaction(RocksDB db,
|
||||
String logDbName,
|
||||
ColumnFamilyHandle cfh,
|
||||
int volumeId,
|
||||
Logger logger) {
|
||||
try (var co = new CompactionOptions()
|
||||
.setCompression(CompressionType.LZ4_COMPRESSION)
|
||||
.setMaxSubcompactions(0)
|
||||
.setOutputFileSizeLimit(2 * SizeUnit.GB)) {
|
||||
List<String> filesToCompact = getColumnFiles(db, cfh, true);
|
||||
|
||||
if (!filesToCompact.isEmpty()) {
|
||||
var partitionSize = filesToCompact.size() / Runtime.getRuntime().availableProcessors();
|
||||
List<List<String>> partitions;
|
||||
if (partitionSize > 0) {
|
||||
partitions = partition(filesToCompact, partitionSize);
|
||||
} else {
|
||||
partitions = List.of(filesToCompact);
|
||||
}
|
||||
int finalBottommostLevel = getLastLevel(db, cfh);
|
||||
Mono.whenDelayError(partitions.stream().map(partition -> Mono.<Void>fromCallable(() -> {
|
||||
logger.info("Compacting {} files in database {} in column family {} to level {}",
|
||||
partition.size(),
|
||||
logDbName,
|
||||
new String(cfh.getName(), StandardCharsets.UTF_8),
|
||||
finalBottommostLevel
|
||||
);
|
||||
if (!partition.isEmpty()) {
|
||||
var coi = new CompactionJobInfo();
|
||||
try {
|
||||
db.compactFiles(co, cfh, partition, finalBottommostLevel, volumeId, coi);
|
||||
logger.info("Compacted {} files in database {} in column family {} to level {}: {}",
|
||||
partition.size(),
|
||||
logDbName,
|
||||
new String(cfh.getName(), StandardCharsets.UTF_8),
|
||||
finalBottommostLevel,
|
||||
coi.status().getCodeString()
|
||||
);
|
||||
} catch (Throwable ex) {
|
||||
logger.error("Failed to compact {} files in database {} in column family {} to level {}",
|
||||
partition.size(),
|
||||
logDbName,
|
||||
new String(cfh.getName(), StandardCharsets.UTF_8),
|
||||
finalBottommostLevel,
|
||||
ex
|
||||
);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}).subscribeOn(Schedulers.boundedElastic())).toList()).block();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -9,6 +9,7 @@ import it.cavallium.dbengine.database.LLDelta;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.StampedLock;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
@ -23,8 +24,8 @@ public final class StandardRocksDBColumn extends AbstractRocksDBColumn<RocksDB>
|
||||
boolean nettyDirect,
|
||||
BufferAllocator alloc,
|
||||
String dbName,
|
||||
ColumnFamilyHandle cfh, MeterRegistry meterRegistry, Lock accessibilityLock) {
|
||||
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, accessibilityLock);
|
||||
ColumnFamilyHandle cfh, MeterRegistry meterRegistry, StampedLock closeLock) {
|
||||
super(db, nettyDirect, alloc, dbName, cfh, meterRegistry, closeLock);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Loading…
Reference in New Issue
Block a user