2021-10-20 01:51:34 +02:00
|
|
|
package it.cavallium.dbengine.database.disk;
|
|
|
|
|
|
|
|
import static it.cavallium.dbengine.database.LLUtils.MARKER_ROCKSDB;
|
|
|
|
|
2022-03-30 18:36:07 +02:00
|
|
|
import io.micrometer.core.instrument.DistributionSummary;
|
2021-10-30 11:13:46 +02:00
|
|
|
import io.micrometer.core.instrument.MeterRegistry;
|
2023-02-09 23:34:25 +01:00
|
|
|
import it.cavallium.dbengine.buffers.Buf;
|
2021-10-20 01:51:34 +02:00
|
|
|
import it.cavallium.dbengine.database.LLDelta;
|
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
|
|
|
import it.cavallium.dbengine.lucene.ExponentialPageLimits;
|
2023-02-09 23:34:25 +01:00
|
|
|
import it.cavallium.dbengine.utils.DBException;
|
2021-10-20 01:51:34 +02:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.util.concurrent.ThreadLocalRandom;
|
2021-12-26 12:47:00 +01:00
|
|
|
import java.util.concurrent.locks.LockSupport;
|
2022-04-30 01:49:44 +02:00
|
|
|
import java.util.concurrent.locks.StampedLock;
|
2021-10-20 01:51:34 +02:00
|
|
|
import org.jetbrains.annotations.NotNull;
|
|
|
|
import org.rocksdb.ColumnFamilyHandle;
|
|
|
|
import org.rocksdb.OptimisticTransactionDB;
|
|
|
|
import org.rocksdb.ReadOptions;
|
|
|
|
import org.rocksdb.RocksDBException;
|
|
|
|
import org.rocksdb.Status.Code;
|
|
|
|
import org.rocksdb.Transaction;
|
2022-05-12 19:14:27 +02:00
|
|
|
import org.rocksdb.TransactionOptions;
|
2021-10-20 01:51:34 +02:00
|
|
|
import org.rocksdb.WriteBatch;
|
|
|
|
import org.rocksdb.WriteOptions;
|
|
|
|
|
|
|
|
public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<OptimisticTransactionDB> {
|
|
|
|
|
2021-10-30 11:13:46 +02:00
|
|
|
private static final boolean ALWAYS_PRINT_OPTIMISTIC_RETRIES = false;
|
|
|
|
|
2022-03-30 18:36:07 +02:00
|
|
|
private final DistributionSummary optimisticAttempts;
|
|
|
|
|
2021-10-20 01:51:34 +02:00
|
|
|
public OptimisticRocksDBColumn(OptimisticTransactionDB db,
|
2022-03-30 18:36:07 +02:00
|
|
|
String databaseName,
|
2022-05-20 10:20:00 +02:00
|
|
|
ColumnFamilyHandle cfh,
|
2022-04-28 23:23:26 +02:00
|
|
|
MeterRegistry meterRegistry,
|
2022-04-30 01:49:44 +02:00
|
|
|
StampedLock closeLock) {
|
2023-02-09 23:34:25 +01:00
|
|
|
super(db, databaseName, cfh, meterRegistry, closeLock);
|
2022-03-30 18:36:07 +02:00
|
|
|
this.optimisticAttempts = DistributionSummary
|
|
|
|
.builder("db.optimistic.attempts.distribution")
|
|
|
|
.publishPercentiles(0.2, 0.5, 0.95)
|
|
|
|
.baseUnit("times")
|
|
|
|
.scale(1)
|
|
|
|
.publishPercentileHistogram()
|
|
|
|
.tags("db.name", databaseName, "db.column", columnName)
|
|
|
|
.register(meterRegistry);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-05-20 10:20:00 +02:00
|
|
|
protected boolean commitOptimistically(Transaction tx) throws RocksDBException {
|
2021-10-20 01:51:34 +02:00
|
|
|
try {
|
2022-05-20 10:20:00 +02:00
|
|
|
tx.commit();
|
2021-10-20 01:51:34 +02:00
|
|
|
return true;
|
|
|
|
} catch (RocksDBException ex) {
|
|
|
|
var status = ex.getStatus() != null ? ex.getStatus().getCode() : Code.Ok;
|
|
|
|
if (status == Code.Busy || status == Code.TryAgain) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
throw ex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-05-20 10:20:00 +02:00
|
|
|
protected Transaction beginTransaction(@NotNull WriteOptions writeOptions,
|
|
|
|
TransactionOptions txOpts) {
|
|
|
|
return getDb().beginTransaction(writeOptions);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-05-20 10:20:00 +02:00
|
|
|
public void write(WriteOptions writeOptions, WriteBatch writeBatch) throws RocksDBException {
|
|
|
|
getDb().write(writeOptions, writeBatch);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-05-20 10:20:00 +02:00
|
|
|
public @NotNull UpdateAtomicResult updateAtomicImpl(@NotNull ReadOptions readOptions,
|
|
|
|
@NotNull WriteOptions writeOptions,
|
2023-02-09 23:34:25 +01:00
|
|
|
Buf key,
|
2022-04-01 01:30:56 +02:00
|
|
|
BinarySerializationFunction updater,
|
2023-02-09 23:34:25 +01:00
|
|
|
UpdateAtomicResultMode returnMode) {
|
2022-04-01 01:30:56 +02:00
|
|
|
long initNanoTime = System.nanoTime();
|
|
|
|
try {
|
|
|
|
var cfh = getCfh();
|
2023-02-09 23:34:25 +01:00
|
|
|
var keyArray = LLUtils.asArray(key);
|
|
|
|
if (LLUtils.isInNonBlockingThread()) {
|
2022-04-01 01:30:56 +02:00
|
|
|
throw new UnsupportedOperationException("Called update in a nonblocking thread");
|
|
|
|
}
|
2022-05-20 10:20:00 +02:00
|
|
|
try (var txOpts = new TransactionOptions();
|
2022-05-12 19:14:27 +02:00
|
|
|
var tx = beginTransaction(writeOptions, txOpts)) {
|
2022-04-01 01:30:56 +02:00
|
|
|
boolean committedSuccessfully;
|
|
|
|
int retries = 0;
|
|
|
|
ExponentialPageLimits retryTime = null;
|
2023-02-09 23:34:25 +01:00
|
|
|
Buf prevData = null;
|
|
|
|
Buf newData = null;
|
|
|
|
boolean changed;
|
|
|
|
do {
|
|
|
|
var prevDataArray = tx.getForUpdate(readOptions, cfh, keyArray, true);
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace(MARKER_ROCKSDB,
|
|
|
|
"Reading {}: {} (before update)",
|
|
|
|
LLUtils.toStringSafe(key),
|
|
|
|
LLUtils.toStringSafe(prevDataArray)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if (prevDataArray != null) {
|
|
|
|
prevData = Buf.wrap(prevDataArray);
|
|
|
|
prevDataArray = null;
|
|
|
|
} else {
|
|
|
|
prevData = null;
|
|
|
|
}
|
|
|
|
Buf prevDataToSendToUpdater;
|
|
|
|
if (prevData != null) {
|
|
|
|
prevDataToSendToUpdater = prevData.copy();
|
|
|
|
} else {
|
|
|
|
prevDataToSendToUpdater = null;
|
|
|
|
}
|
|
|
|
newData = updater.apply(prevDataToSendToUpdater);
|
|
|
|
var newDataArray = newData == null ? null : LLUtils.asArray(newData);
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace(MARKER_ROCKSDB,
|
|
|
|
"Updating {}. previous data: {}, updated data: {}",
|
|
|
|
LLUtils.toStringSafe(key),
|
|
|
|
LLUtils.toStringSafe(prevDataArray),
|
|
|
|
LLUtils.toStringSafe(newDataArray)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if (prevData != null && newData == null) {
|
2022-05-20 23:59:56 +02:00
|
|
|
if (logger.isTraceEnabled()) {
|
2023-02-09 23:34:25 +01:00
|
|
|
logger.trace(MARKER_ROCKSDB, "Deleting {} (after update)", LLUtils.toStringSafe(key));
|
2022-05-20 23:59:56 +02:00
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
tx.delete(cfh, keyArray, true);
|
|
|
|
changed = true;
|
|
|
|
committedSuccessfully = commitOptimistically(tx);
|
|
|
|
} else if (newData != null && (prevData == null || !LLUtils.equals(prevData, newData))) {
|
2022-05-20 23:59:56 +02:00
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace(MARKER_ROCKSDB,
|
2023-02-09 23:34:25 +01:00
|
|
|
"Writing {}: {} (after update)",
|
2022-05-20 23:59:56 +02:00
|
|
|
LLUtils.toStringSafe(key),
|
2023-02-09 23:34:25 +01:00
|
|
|
LLUtils.toStringSafe(newData)
|
2022-05-20 23:59:56 +02:00
|
|
|
);
|
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
tx.put(cfh, keyArray, newDataArray);
|
|
|
|
changed = true;
|
|
|
|
committedSuccessfully = commitOptimistically(tx);
|
|
|
|
} else {
|
|
|
|
changed = false;
|
|
|
|
committedSuccessfully = true;
|
|
|
|
tx.rollback();
|
|
|
|
}
|
|
|
|
if (!committedSuccessfully) {
|
|
|
|
tx.undoGetForUpdate(cfh, keyArray);
|
|
|
|
tx.rollback();
|
|
|
|
retries++;
|
2021-10-30 11:13:46 +02:00
|
|
|
|
2023-02-09 23:34:25 +01:00
|
|
|
if (retries == 1) {
|
|
|
|
retryTime = new ExponentialPageLimits(0, 2, 2000);
|
|
|
|
}
|
|
|
|
long retryNs = 1000000L * retryTime.getPageLimit(retries);
|
2021-10-30 11:13:46 +02:00
|
|
|
|
2023-02-09 23:34:25 +01:00
|
|
|
// +- 30%
|
|
|
|
retryNs = retryNs + ThreadLocalRandom.current().nextLong(-retryNs * 30L / 100L, retryNs * 30L / 100L);
|
2021-10-30 11:13:46 +02:00
|
|
|
|
2023-02-09 23:34:25 +01:00
|
|
|
if (retries >= 5 && retries % 5 == 0 || ALWAYS_PRINT_OPTIMISTIC_RETRIES) {
|
|
|
|
logger.warn(MARKER_ROCKSDB, "Failed optimistic transaction {} (update):"
|
|
|
|
+ " waiting {} ms before retrying for the {} time", LLUtils.toStringSafe(key), retryNs / 1000000d, retries);
|
|
|
|
} else if (logger.isDebugEnabled(MARKER_ROCKSDB)) {
|
|
|
|
logger.debug(MARKER_ROCKSDB, "Failed optimistic transaction {} (update):"
|
|
|
|
+ " waiting {} ms before retrying for the {} time", LLUtils.toStringSafe(key), retryNs / 1000000d, retries);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
// Wait for n milliseconds
|
|
|
|
if (retryNs > 0) {
|
|
|
|
LockSupport.parkNanos(retryNs);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
2022-05-20 18:31:05 +02:00
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
} while (!committedSuccessfully);
|
|
|
|
if (retries > 5) {
|
|
|
|
logger.warn(MARKER_ROCKSDB, "Took {} retries to update key {}", retries, LLUtils.toStringSafe(key));
|
2022-05-20 23:59:56 +02:00
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
recordAtomicUpdateTime(changed, prevData != null, newData != null, initNanoTime);
|
|
|
|
optimisticAttempts.record(retries);
|
|
|
|
return switch (returnMode) {
|
|
|
|
case NOTHING -> RESULT_NOTHING;
|
|
|
|
case CURRENT -> new UpdateAtomicResultCurrent(newData);
|
|
|
|
case PREVIOUS -> new UpdateAtomicResultPrevious(prevData);
|
|
|
|
case BINARY_CHANGED -> new UpdateAtomicResultBinaryChanged(changed);
|
|
|
|
case DELTA -> new UpdateAtomicResultDelta(LLDelta.of(prevData, newData));
|
|
|
|
};
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
2023-02-09 23:34:25 +01:00
|
|
|
} catch (Exception ex) {
|
|
|
|
throw new DBException("Failed to update key " + LLUtils.toStringSafe(key), ex);
|
2021-10-20 01:51:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean supportsTransactions() {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|