2020-12-07 22:15:18 +01:00
|
|
|
package it.cavallium.dbengine.database.disk;
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
import static io.netty.buffer.Unpooled.wrappedBuffer;
|
|
|
|
|
2021-04-30 19:15:04 +02:00
|
|
|
import io.netty.buffer.ByteBuf;
|
|
|
|
import io.netty.buffer.ByteBufAllocator;
|
|
|
|
import io.netty.util.ReferenceCounted;
|
2021-06-27 11:58:12 +02:00
|
|
|
import it.cavallium.dbengine.client.BadBlock;
|
2021-07-01 21:19:52 +02:00
|
|
|
import it.cavallium.dbengine.client.DatabaseOptions;
|
2021-08-16 10:27:47 +02:00
|
|
|
import it.cavallium.dbengine.database.Column;
|
2021-05-08 03:09:00 +02:00
|
|
|
import it.cavallium.dbengine.database.Delta;
|
2021-07-17 11:52:08 +02:00
|
|
|
import it.cavallium.dbengine.database.ExtraKeyOperationResult;
|
2021-01-17 18:31:25 +01:00
|
|
|
import it.cavallium.dbengine.database.LLDictionary;
|
|
|
|
import it.cavallium.dbengine.database.LLDictionaryResultType;
|
2021-08-28 22:42:51 +02:00
|
|
|
import it.cavallium.dbengine.database.LLEntry;
|
2021-01-30 00:24:55 +01:00
|
|
|
import it.cavallium.dbengine.database.LLRange;
|
2021-01-17 18:31:25 +01:00
|
|
|
import it.cavallium.dbengine.database.LLSnapshot;
|
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
2021-07-17 11:52:08 +02:00
|
|
|
import it.cavallium.dbengine.database.RepeatedElementList;
|
2021-02-13 01:31:24 +01:00
|
|
|
import it.cavallium.dbengine.database.UpdateMode;
|
2021-05-08 03:09:00 +02:00
|
|
|
import it.cavallium.dbengine.database.UpdateReturnMode;
|
2021-08-22 21:23:22 +02:00
|
|
|
import it.cavallium.dbengine.database.serialization.BiSerializationFunction;
|
|
|
|
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
2021-02-06 19:21:31 +01:00
|
|
|
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.io.IOException;
|
2021-04-30 19:15:04 +02:00
|
|
|
import java.nio.ByteBuffer;
|
2021-07-17 11:52:08 +02:00
|
|
|
import java.time.Duration;
|
2021-02-01 02:21:53 +01:00
|
|
|
import java.util.ArrayList;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.util.Arrays;
|
2021-05-02 19:18:15 +02:00
|
|
|
import java.util.Collection;
|
2021-02-01 02:21:53 +01:00
|
|
|
import java.util.List;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.util.Map;
|
|
|
|
import java.util.Map.Entry;
|
|
|
|
import java.util.Objects;
|
2021-07-17 11:52:08 +02:00
|
|
|
import java.util.Optional;
|
2021-03-18 19:53:32 +01:00
|
|
|
import java.util.concurrent.Callable;
|
|
|
|
import java.util.concurrent.ForkJoinPool;
|
|
|
|
import java.util.concurrent.ForkJoinTask;
|
2021-02-13 00:18:57 +01:00
|
|
|
import java.util.concurrent.locks.StampedLock;
|
2021-07-17 11:52:08 +02:00
|
|
|
import java.util.function.BiFunction;
|
2020-12-07 22:15:18 +01:00
|
|
|
import java.util.function.Function;
|
2021-03-18 19:53:32 +01:00
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import java.util.stream.IntStream;
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.jetbrains.annotations.NotNull;
|
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-04-30 19:15:04 +02:00
|
|
|
import org.rocksdb.AbstractSlice;
|
|
|
|
import org.rocksdb.CappedWriteBatch;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.ColumnFamilyHandle;
|
2021-03-20 12:41:11 +01:00
|
|
|
import org.rocksdb.CompactRangeOptions;
|
2021-04-30 19:15:04 +02:00
|
|
|
import org.rocksdb.DirectSlice;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.FlushOptions;
|
|
|
|
import org.rocksdb.Holder;
|
|
|
|
import org.rocksdb.ReadOptions;
|
|
|
|
import org.rocksdb.RocksDB;
|
|
|
|
import org.rocksdb.RocksDBException;
|
|
|
|
import org.rocksdb.RocksIterator;
|
2021-03-13 19:01:36 +01:00
|
|
|
import org.rocksdb.Slice;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.Snapshot;
|
2021-05-02 19:18:15 +02:00
|
|
|
import org.rocksdb.WriteBatch;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.rocksdb.WriteOptions;
|
|
|
|
import org.warp.commonutils.concurrency.atomicity.NotAtomic;
|
2021-02-06 19:21:31 +01:00
|
|
|
import org.warp.commonutils.locks.Striped;
|
2021-02-26 14:06:16 +01:00
|
|
|
import org.warp.commonutils.log.Logger;
|
|
|
|
import org.warp.commonutils.log.LoggerFactory;
|
2021-01-30 00:24:55 +01:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2021-02-01 02:21:53 +01:00
|
|
|
import reactor.core.scheduler.Scheduler;
|
2021-07-17 11:52:08 +02:00
|
|
|
import reactor.util.function.Tuple2;
|
2021-04-03 19:09:06 +02:00
|
|
|
import reactor.util.function.Tuple3;
|
|
|
|
import reactor.util.function.Tuples;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
@NotAtomic
|
|
|
|
public class LLLocalDictionary implements LLDictionary {
|
|
|
|
|
2021-02-02 15:36:11 +01:00
|
|
|
protected static final Logger logger = LoggerFactory.getLogger(LLLocalDictionary.class);
|
2021-03-18 19:53:32 +01:00
|
|
|
private static final boolean USE_CURRENT_FASTSIZE_FOR_OLD_SNAPSHOTS = false;
|
2020-12-07 22:15:18 +01:00
|
|
|
static final int RESERVED_WRITE_BATCH_SIZE = 2 * 1024 * 1024; // 2MiB
|
|
|
|
static final long MAX_WRITE_BATCH_SIZE = 1024L * 1024L * 1024L; // 1GiB
|
|
|
|
static final int CAPPED_WRITE_BATCH_CAP = 50000; // 50K operations
|
2021-02-01 02:21:53 +01:00
|
|
|
static final int MULTI_GET_WINDOW = 500;
|
2021-07-17 11:52:08 +02:00
|
|
|
static final Duration MULTI_GET_WINDOW_TIMEOUT = Duration.ofSeconds(1);
|
2021-08-22 18:20:05 +02:00
|
|
|
static final ReadOptions EMPTY_READ_OPTIONS = new UnreleasableReadOptions(new UnmodifiableReadOptions());
|
|
|
|
static final WriteOptions EMPTY_WRITE_OPTIONS = new UnreleasableWriteOptions(new UnmodifiableWriteOptions());
|
|
|
|
static final WriteOptions BATCH_WRITE_OPTIONS = new UnreleasableWriteOptions(new UnmodifiableWriteOptions());
|
2021-03-14 19:38:20 +01:00
|
|
|
static final boolean PREFER_SEEK_TO_FIRST = false;
|
2021-06-27 11:58:12 +02:00
|
|
|
/**
|
2021-08-16 10:36:54 +02:00
|
|
|
* It used to be false,
|
|
|
|
* now it's true to avoid crashes during iterations on completely corrupted files
|
2021-06-27 11:58:12 +02:00
|
|
|
*/
|
|
|
|
static final boolean VERIFY_CHECKSUMS_WHEN_NOT_NEEDED = true;
|
2021-05-03 02:45:29 +02:00
|
|
|
/**
|
|
|
|
* Default: true. Use false to debug problems with windowing.
|
|
|
|
*/
|
|
|
|
static final boolean USE_WINDOW_IN_SET_RANGE = true;
|
2021-05-02 19:18:15 +02:00
|
|
|
/**
|
|
|
|
* Default: true. Use false to debug problems with write batches.
|
|
|
|
*/
|
2021-05-03 12:29:15 +02:00
|
|
|
static final boolean USE_WRITE_BATCHES_IN_PUT_MULTI = true;
|
|
|
|
/**
|
|
|
|
* Default: true. Use false to debug problems with write batches.
|
|
|
|
*/
|
|
|
|
static final boolean USE_WRITE_BATCHES_IN_SET_RANGE = true;
|
2021-05-02 19:18:15 +02:00
|
|
|
/**
|
|
|
|
* Default: true. Use false to debug problems with capped write batches.
|
|
|
|
*/
|
2021-05-03 02:45:29 +02:00
|
|
|
static final boolean USE_CAPPED_WRITE_BATCH_IN_SET_RANGE = true;
|
2021-05-03 12:29:15 +02:00
|
|
|
/**
|
|
|
|
* Default: true. Use false to debug problems with write batches deletes.
|
|
|
|
*/
|
|
|
|
static final boolean USE_WRITE_BATCH_IN_SET_RANGE_DELETE = false;
|
2021-03-18 19:53:32 +01:00
|
|
|
static final boolean PARALLEL_EXACT_SIZE = true;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-02-15 00:15:42 +01:00
|
|
|
private static final int STRIPES = 512;
|
2021-01-30 00:24:55 +01:00
|
|
|
private static final byte[] FIRST_KEY = new byte[]{};
|
2020-12-07 22:15:18 +01:00
|
|
|
private static final byte[] NO_DATA = new byte[0];
|
2021-04-30 19:15:04 +02:00
|
|
|
|
|
|
|
private static final boolean ASSERTIONS_ENABLED;
|
2021-05-03 00:29:26 +02:00
|
|
|
/**
|
|
|
|
* Default: true
|
|
|
|
*/
|
|
|
|
private static final boolean USE_DIRECT_BUFFER_BOUNDS = true;
|
2021-05-05 00:07:18 +02:00
|
|
|
private static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096;
|
2021-04-30 19:15:04 +02:00
|
|
|
|
2021-06-27 11:58:12 +02:00
|
|
|
/**
|
|
|
|
* 1KiB dummy buffer, write only, used for debugging purposes
|
|
|
|
*/
|
|
|
|
private static final ByteBuffer DUMMY_WRITE_ONLY_BYTE_BUFFER = ByteBuffer.allocateDirect(1024);
|
|
|
|
|
2021-04-30 19:15:04 +02:00
|
|
|
static {
|
|
|
|
boolean assertionsEnabled = false;
|
|
|
|
//noinspection AssertWithSideEffects
|
|
|
|
assert (assertionsEnabled = true);
|
|
|
|
//noinspection ConstantConditions
|
|
|
|
ASSERTIONS_ENABLED = assertionsEnabled;
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
private final RocksDB db;
|
|
|
|
private final ColumnFamilyHandle cfh;
|
|
|
|
private final String databaseName;
|
2021-06-26 02:35:33 +02:00
|
|
|
private final String columnName;
|
2021-02-01 02:21:53 +01:00
|
|
|
private final Scheduler dbScheduler;
|
2020-12-07 22:15:18 +01:00
|
|
|
private final Function<LLSnapshot, Snapshot> snapshotResolver;
|
2021-02-13 00:18:57 +01:00
|
|
|
private final Striped<StampedLock> itemsLock = Striped.readWriteStampedLock(STRIPES);
|
2021-02-13 01:31:24 +01:00
|
|
|
private final UpdateMode updateMode;
|
2021-04-30 19:15:04 +02:00
|
|
|
private final ByteBufAllocator alloc;
|
2021-05-28 16:04:59 +02:00
|
|
|
private final String getRangeMultiDebugName;
|
|
|
|
private final String getRangeKeysMultiDebugName;
|
2021-06-29 23:31:02 +02:00
|
|
|
private final DatabaseOptions databaseOptions;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
public LLLocalDictionary(
|
|
|
|
ByteBufAllocator allocator,
|
|
|
|
@NotNull RocksDB db,
|
2020-12-07 22:15:18 +01:00
|
|
|
@NotNull ColumnFamilyHandle columnFamilyHandle,
|
|
|
|
String databaseName,
|
2021-06-26 02:35:33 +02:00
|
|
|
String columnName,
|
2021-02-01 02:21:53 +01:00
|
|
|
Scheduler dbScheduler,
|
2021-02-13 01:31:24 +01:00
|
|
|
Function<LLSnapshot, Snapshot> snapshotResolver,
|
2021-06-29 23:31:02 +02:00
|
|
|
UpdateMode updateMode,
|
|
|
|
DatabaseOptions databaseOptions) {
|
2020-12-07 22:15:18 +01:00
|
|
|
Objects.requireNonNull(db);
|
|
|
|
this.db = db;
|
|
|
|
Objects.requireNonNull(columnFamilyHandle);
|
|
|
|
this.cfh = columnFamilyHandle;
|
|
|
|
this.databaseName = databaseName;
|
2021-06-26 02:35:33 +02:00
|
|
|
this.columnName = columnName;
|
2021-02-01 02:21:53 +01:00
|
|
|
this.dbScheduler = dbScheduler;
|
2020-12-07 22:15:18 +01:00
|
|
|
this.snapshotResolver = snapshotResolver;
|
2021-02-13 01:31:24 +01:00
|
|
|
this.updateMode = updateMode;
|
2021-06-26 02:35:33 +02:00
|
|
|
this.getRangeMultiDebugName = databaseName + "(" + columnName + ")" + "::getRangeMulti";
|
|
|
|
this.getRangeKeysMultiDebugName = databaseName + "(" + columnName + ")" + "::getRangeKeysMulti";
|
2021-06-29 23:31:02 +02:00
|
|
|
this.databaseOptions = databaseOptions;
|
2021-05-03 21:41:51 +02:00
|
|
|
alloc = allocator;
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getDatabaseName() {
|
|
|
|
return databaseName;
|
|
|
|
}
|
|
|
|
|
2021-06-26 02:35:33 +02:00
|
|
|
public String getColumnName() {
|
|
|
|
return columnName;
|
|
|
|
}
|
|
|
|
|
2021-06-19 21:55:20 +02:00
|
|
|
/**
|
2021-08-16 10:36:54 +02:00
|
|
|
* Please don't modify the returned ReadOptions!
|
|
|
|
* If you want to modify it, wrap it into a new ReadOptions!
|
2021-06-19 21:55:20 +02:00
|
|
|
*/
|
2020-12-07 22:15:18 +01:00
|
|
|
private ReadOptions resolveSnapshot(LLSnapshot snapshot) {
|
|
|
|
if (snapshot != null) {
|
|
|
|
return getReadOptions(snapshotResolver.apply(snapshot));
|
|
|
|
} else {
|
|
|
|
return EMPTY_READ_OPTIONS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-19 21:55:20 +02:00
|
|
|
/**
|
2021-08-16 10:36:54 +02:00
|
|
|
* Please don't modify the returned ReadOptions!
|
|
|
|
* If you want to modify it, wrap it into a new ReadOptions!
|
2021-06-19 21:55:20 +02:00
|
|
|
*/
|
2020-12-07 22:15:18 +01:00
|
|
|
private ReadOptions getReadOptions(Snapshot snapshot) {
|
|
|
|
if (snapshot != null) {
|
|
|
|
return new ReadOptions().setSnapshot(snapshot);
|
|
|
|
} else {
|
|
|
|
return EMPTY_READ_OPTIONS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-30 19:15:04 +02:00
|
|
|
private int getLockIndex(ByteBuf key) {
|
2021-05-08 03:09:00 +02:00
|
|
|
return Math.abs(LLUtils.hashCode(key) % STRIPES);
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
|
|
|
|
2021-04-30 19:15:04 +02:00
|
|
|
private IntArrayList getLockIndices(List<ByteBuf> keys) {
|
2021-02-06 19:21:31 +01:00
|
|
|
var list = new IntArrayList(keys.size());
|
2021-04-30 19:15:04 +02:00
|
|
|
for (ByteBuf key : keys) {
|
2021-02-06 19:21:31 +01:00
|
|
|
list.add(getLockIndex(key));
|
|
|
|
}
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2021-08-28 22:42:51 +02:00
|
|
|
private IntArrayList getLockIndicesEntries(List<LLEntry> keys) {
|
2021-02-06 19:21:31 +01:00
|
|
|
var list = new IntArrayList(keys.size());
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry key : keys) {
|
2021-02-06 19:21:31 +01:00
|
|
|
list.add(getLockIndex(key.getKey()));
|
|
|
|
}
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2021-07-17 11:52:08 +02:00
|
|
|
private <X> IntArrayList getLockIndicesWithExtra(List<Tuple2<ByteBuf, X>> entries) {
|
|
|
|
var list = new IntArrayList(entries.size());
|
|
|
|
for (Tuple2<ByteBuf, X> key : entries) {
|
|
|
|
list.add(getLockIndex(key.getT1()));
|
|
|
|
}
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-04-30 19:15:04 +02:00
|
|
|
public ByteBufAllocator getAllocator() {
|
|
|
|
return alloc;
|
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
private <T> Mono<T> runOnDb(Callable<@Nullable T> callable) {
|
|
|
|
return Mono.fromCallable(callable).subscribeOn(dbScheduler);
|
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
@Override
|
2021-08-16 10:36:54 +02:00
|
|
|
public Mono<ByteBuf> get(@Nullable LLSnapshot snapshot,
|
|
|
|
Mono<ByteBuf> keyMono,
|
|
|
|
boolean existsAlmostCertainly) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> runOnDb(() -> {
|
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
|
|
|
|
|
|
|
stamp = lock.readLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Reading {}", LLUtils.toStringSafe(key));
|
2021-02-13 01:31:24 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return dbGet(cfh, resolveSnapshot(snapshot), key.retain(), existsAlmostCertainly);
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlockRead(stamp);
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
2021-08-16 10:36:54 +02:00
|
|
|
}).onErrorMap(cause -> new IOException("Failed to read "
|
|
|
|
+ LLUtils.toStringSafe(key), cause)),
|
2021-08-16 10:27:47 +02:00
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
private ByteBuf dbGet(ColumnFamilyHandle cfh,
|
|
|
|
@Nullable ReadOptions readOptions,
|
|
|
|
ByteBuf key,
|
|
|
|
boolean existsAlmostCertainly) throws RocksDBException {
|
2021-04-30 19:15:04 +02:00
|
|
|
try {
|
2021-06-29 23:31:02 +02:00
|
|
|
if (databaseOptions.allowNettyDirect() && key.isDirect()) {
|
2021-05-02 19:18:15 +02:00
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
//todo: implement keyMayExist if existsAlmostCertainly is false.
|
|
|
|
// Unfortunately it's not feasible until RocksDB implements keyMayExist with buffers
|
2021-04-30 19:15:04 +02:00
|
|
|
|
2021-05-03 21:41:51 +02:00
|
|
|
// Create the key nio buffer to pass to RocksDB
|
|
|
|
if (!key.isDirect()) {
|
|
|
|
throw new RocksDBException("Key buffer must be direct");
|
|
|
|
}
|
|
|
|
ByteBuffer keyNioBuffer = LLUtils.toDirect(key);
|
2021-08-28 22:42:51 +02:00
|
|
|
assert keyNioBuffer.isDirect();
|
2021-05-03 21:41:51 +02:00
|
|
|
// Create a direct result buffer because RocksDB works only with direct buffers
|
2021-08-16 10:36:54 +02:00
|
|
|
ByteBuf resultBuf = alloc.directBuffer(INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES);
|
2021-05-03 21:41:51 +02:00
|
|
|
try {
|
|
|
|
int valueSize;
|
|
|
|
int assertionReadData = -1;
|
|
|
|
ByteBuffer resultNioBuf;
|
|
|
|
do {
|
|
|
|
// Create the result nio buffer to pass to RocksDB
|
|
|
|
resultNioBuf = resultBuf.nioBuffer(0, resultBuf.capacity());
|
2021-08-28 22:42:51 +02:00
|
|
|
assert keyNioBuffer.isDirect();
|
|
|
|
assert resultNioBuf.isDirect();
|
2021-05-03 21:41:51 +02:00
|
|
|
valueSize = db.get(cfh,
|
|
|
|
Objects.requireNonNullElse(readOptions, EMPTY_READ_OPTIONS),
|
2021-05-08 03:09:00 +02:00
|
|
|
keyNioBuffer.position(0),
|
2021-05-03 21:41:51 +02:00
|
|
|
resultNioBuf
|
|
|
|
);
|
|
|
|
if (valueSize != RocksDB.NOT_FOUND) {
|
2021-08-28 22:42:51 +02:00
|
|
|
if (ASSERTIONS_ENABLED) {
|
2021-07-18 19:37:24 +02:00
|
|
|
// todo: check if position is equal to data that have been read
|
|
|
|
// todo: check if limit is equal to value size or data that have been read
|
|
|
|
assert valueSize <= 0 || resultNioBuf.limit() > 0;
|
|
|
|
|
2021-08-16 10:36:54 +02:00
|
|
|
// If the locking is enabled the data is safe, so since we are appending data
|
|
|
|
// to the end, we need to check if it has been appended correctly or it
|
|
|
|
// has been overwritten.
|
|
|
|
// We must not do this check otherwise because if there is no locking the data
|
|
|
|
// can be overwritten with a smaller value the next time.
|
2021-07-18 19:37:24 +02:00
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
// Check if read data is larger than previously read data.
|
2021-08-16 10:36:54 +02:00
|
|
|
// If it's smaller or equals it means that RocksDB is overwriting
|
|
|
|
// the beginning of the result buffer.
|
2021-07-18 19:37:24 +02:00
|
|
|
assert resultNioBuf.limit() > assertionReadData;
|
|
|
|
if (ASSERTIONS_ENABLED) {
|
|
|
|
assertionReadData = resultNioBuf.limit();
|
|
|
|
}
|
2021-05-03 21:41:51 +02:00
|
|
|
}
|
|
|
|
|
2021-07-18 19:37:24 +02:00
|
|
|
// Check if read data is not bigger than the total value size.
|
2021-08-16 10:36:54 +02:00
|
|
|
// If it's bigger it means that RocksDB is writing the start
|
|
|
|
// of the result into the result buffer more than once.
|
2021-07-18 19:37:24 +02:00
|
|
|
assert resultNioBuf.limit() <= valueSize;
|
|
|
|
}
|
2021-05-03 21:41:51 +02:00
|
|
|
|
|
|
|
if (valueSize <= resultNioBuf.limit()) {
|
|
|
|
// Return the result ready to be read
|
|
|
|
return resultBuf.setIndex(0, valueSize).retain();
|
|
|
|
} else {
|
|
|
|
// If the locking is enabled the data is safe, so we can append the next read data.
|
|
|
|
// Otherwise we need to re-read everything.
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
// Update the resultBuf writerIndex with the new position
|
|
|
|
resultBuf.writerIndex(resultNioBuf.limit());
|
|
|
|
}
|
|
|
|
//noinspection UnusedAssignment
|
|
|
|
resultNioBuf = null;
|
|
|
|
}
|
|
|
|
// Rewind the keyNioBuf position, making it readable again for the next loop iteration
|
|
|
|
keyNioBuffer.rewind();
|
|
|
|
if (resultBuf.capacity() < valueSize) {
|
2021-08-16 10:36:54 +02:00
|
|
|
// Expand the resultBuf size if the result is bigger than the current result
|
|
|
|
// buffer size
|
2021-05-03 21:41:51 +02:00
|
|
|
resultBuf.capacity(valueSize);
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
}
|
2021-05-03 21:41:51 +02:00
|
|
|
// Repeat if the result has been found but it's still not finished
|
|
|
|
} while (valueSize != RocksDB.NOT_FOUND);
|
|
|
|
// If the value is not found return null
|
|
|
|
return null;
|
|
|
|
} finally {
|
|
|
|
resultBuf.release();
|
|
|
|
}
|
|
|
|
} else {
|
2021-08-16 10:36:54 +02:00
|
|
|
ReadOptions validReadOptions = Objects.requireNonNullElse(readOptions, EMPTY_READ_OPTIONS);
|
2021-05-03 21:41:51 +02:00
|
|
|
byte[] keyArray = LLUtils.toArray(key);
|
|
|
|
Objects.requireNonNull(keyArray);
|
|
|
|
Holder<byte[]> data = existsAlmostCertainly ? null : new Holder<>();
|
|
|
|
if (existsAlmostCertainly || db.keyMayExist(cfh,
|
2021-08-16 10:36:54 +02:00
|
|
|
validReadOptions,
|
2021-05-03 21:41:51 +02:00
|
|
|
keyArray,
|
|
|
|
data
|
|
|
|
)) {
|
|
|
|
if (!existsAlmostCertainly && data.getValue() != null) {
|
|
|
|
return wrappedBuffer(data.getValue());
|
|
|
|
} else {
|
2021-08-16 10:36:54 +02:00
|
|
|
byte[] result = db.get(cfh, validReadOptions, keyArray);
|
2021-05-03 21:41:51 +02:00
|
|
|
if (result == null) {
|
|
|
|
return null;
|
|
|
|
} else {
|
|
|
|
return wrappedBuffer(result);
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
|
|
|
}
|
2021-05-03 21:41:51 +02:00
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
key.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
@SuppressWarnings("SameParameterValue")
|
2021-08-16 10:36:54 +02:00
|
|
|
private void dbPut(ColumnFamilyHandle cfh,
|
|
|
|
@Nullable WriteOptions writeOptions,
|
|
|
|
ByteBuf key,
|
|
|
|
ByteBuf value) throws RocksDBException {
|
2021-04-30 19:15:04 +02:00
|
|
|
try {
|
2021-08-16 10:36:54 +02:00
|
|
|
WriteOptions validWriteOptions = Objects.requireNonNullElse(writeOptions, EMPTY_WRITE_OPTIONS);
|
2021-06-29 23:31:02 +02:00
|
|
|
if (databaseOptions.allowNettyDirect() && key.isDirect() && value.isDirect()) {
|
2021-05-03 21:41:51 +02:00
|
|
|
if (!key.isDirect()) {
|
|
|
|
throw new RocksDBException("Key buffer must be direct");
|
|
|
|
}
|
|
|
|
if (!value.isDirect()) {
|
|
|
|
throw new RocksDBException("Value buffer must be direct");
|
|
|
|
}
|
2021-07-18 19:37:24 +02:00
|
|
|
var keyNioBuffer = LLUtils.toDirect(key);
|
2021-08-28 22:42:51 +02:00
|
|
|
assert keyNioBuffer.isDirect();
|
2021-04-30 19:15:04 +02:00
|
|
|
|
|
|
|
|
2021-07-18 19:37:24 +02:00
|
|
|
var valueNioBuffer = LLUtils.toDirect(value);
|
2021-08-28 22:42:51 +02:00
|
|
|
assert valueNioBuffer.isDirect();
|
2021-08-16 10:36:54 +02:00
|
|
|
db.put(cfh, validWriteOptions, keyNioBuffer, valueNioBuffer);
|
2021-05-03 21:41:51 +02:00
|
|
|
} else {
|
2021-08-16 10:36:54 +02:00
|
|
|
db.put(cfh, validWriteOptions, LLUtils.toArray(key), LLUtils.toArray(value));
|
2021-05-03 21:41:51 +02:00
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
} finally {
|
|
|
|
key.release();
|
|
|
|
value.release();
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<Boolean> isRangeEmpty(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
return this.containsKey(snapshot, Mono.just(range.getSingle()).map(ByteBuf::retain));
|
|
|
|
} else {
|
|
|
|
return this.containsRange(snapshot, Mono.just(range).map(LLRange::retain));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
).map(isContained -> !isContained);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<Boolean> containsRange(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> runOnDb(() -> {
|
|
|
|
try (var readOpts = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
readOpts.setVerifyChecksums(VERIFY_CHECKSUMS_WHEN_NOT_NEEDED);
|
|
|
|
readOpts.setFillCache(false);
|
|
|
|
if (range.hasMin()) {
|
|
|
|
if (databaseOptions.allowNettyDirect() && range.getMin().isDirect()) {
|
2021-08-16 10:36:54 +02:00
|
|
|
readOpts.setIterateLowerBound(new DirectSlice(Objects
|
|
|
|
.requireNonNull(LLUtils.toDirect(range.getMin()),
|
|
|
|
"This range must use direct buffers")));
|
2021-08-16 10:27:47 +02:00
|
|
|
} else {
|
|
|
|
readOpts.setIterateLowerBound(new Slice(LLUtils.toArray(range.getMin())));
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
if (range.hasMax()) {
|
|
|
|
if (databaseOptions.allowNettyDirect() && range.getMax().isDirect()) {
|
2021-08-16 10:36:54 +02:00
|
|
|
readOpts.setIterateUpperBound(new DirectSlice(Objects
|
|
|
|
.requireNonNull(LLUtils.toDirect(range.getMax()),
|
2021-08-16 10:27:47 +02:00
|
|
|
"This range must use direct buffers"
|
|
|
|
)));
|
|
|
|
} else {
|
|
|
|
readOpts.setIterateUpperBound(new Slice(LLUtils.toArray(range.getMax())));
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
try (RocksIterator rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
if (databaseOptions.allowNettyDirect() && range.getMin().isDirect()) {
|
|
|
|
rocksIterator.seek(Objects.requireNonNull(LLUtils.toDirect(range.getMin()),
|
|
|
|
"This range must use direct buffers"
|
|
|
|
));
|
2021-06-19 21:55:20 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.seek(LLUtils.toArray(range.getMin()));
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} else {
|
|
|
|
rocksIterator.seekToFirst();
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.status();
|
|
|
|
return rocksIterator.isValid();
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
}).onErrorMap(cause -> new IOException("Failed to read range " + range.toString(), cause)),
|
|
|
|
range -> Mono.fromRunnable(range::release));
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
private Mono<Boolean> containsKey(@Nullable LLSnapshot snapshot, Mono<ByteBuf> keyMono) {
|
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> runOnDb(() -> {
|
2021-05-12 01:25:59 +02:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
|
|
|
|
|
|
|
stamp = lock.readLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
int size = RocksDB.NOT_FOUND;
|
|
|
|
byte[] keyBytes = LLUtils.toArray(key);
|
|
|
|
Holder<byte[]> data = new Holder<>();
|
|
|
|
var unmodifiableReadOpts = resolveSnapshot(snapshot);
|
|
|
|
if (db.keyMayExist(cfh, unmodifiableReadOpts, keyBytes, data)) {
|
|
|
|
if (data.getValue() != null) {
|
|
|
|
size = data.getValue().length;
|
|
|
|
} else {
|
|
|
|
size = db.get(cfh, unmodifiableReadOpts, keyBytes, NO_DATA);
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return size != RocksDB.NOT_FOUND;
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlockRead(stamp);
|
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:36:54 +02:00
|
|
|
}).onErrorMap(cause -> new IOException("Failed to read "
|
|
|
|
+ LLUtils.toStringSafe(key), cause)),
|
2021-08-16 10:27:47 +02:00
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2021-01-30 00:24:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-16 10:36:54 +02:00
|
|
|
public Mono<ByteBuf> put(Mono<ByteBuf> keyMono,
|
|
|
|
Mono<ByteBuf> valueMono,
|
|
|
|
LLDictionaryResultType resultType) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> this
|
|
|
|
.getPreviousData(Mono.just(key).map(ByteBuf::retain), resultType)
|
|
|
|
.concatWith(Mono.usingWhen(valueMono,
|
|
|
|
value -> this.<ByteBuf>runOnDb(() -> {
|
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
2021-05-12 01:25:59 +02:00
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
2021-08-16 10:27:47 +02:00
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
|
|
|
|
|
|
|
stamp = lock.writeLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
2021-08-16 10:36:54 +02:00
|
|
|
logger.trace("Writing {}: {}",
|
|
|
|
LLUtils.toStringSafe(key), LLUtils.toStringSafe(value));
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
dbPut(cfh, null, key.retain(), value.retain());
|
|
|
|
return null;
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlockWrite(stamp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}),
|
|
|
|
value -> Mono.fromRunnable(value::release)
|
2021-08-16 10:36:54 +02:00
|
|
|
).onErrorMap(cause -> new IOException("Failed to write "
|
|
|
|
+ LLUtils.toStringSafe(key), cause)))
|
2021-08-16 10:27:47 +02:00
|
|
|
.singleOrEmpty(),
|
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-05-02 19:18:15 +02:00
|
|
|
@Override
|
|
|
|
public Mono<UpdateMode> getUpdateMode() {
|
|
|
|
return Mono.fromSupplier(() -> updateMode);
|
|
|
|
}
|
|
|
|
|
2021-05-08 03:09:00 +02:00
|
|
|
// Remember to change also updateAndGetDelta() if you are modifying this function
|
|
|
|
@SuppressWarnings("DuplicatedCode")
|
2021-02-06 19:21:31 +01:00
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<ByteBuf> update(Mono<ByteBuf> keyMono,
|
2021-08-22 21:23:22 +02:00
|
|
|
SerializationFunction<@Nullable ByteBuf, @Nullable ByteBuf> updater,
|
2021-05-08 03:09:00 +02:00
|
|
|
UpdateReturnMode updateReturnMode,
|
|
|
|
boolean existsAlmostCertainly) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> runOnDb(() -> {
|
|
|
|
if (updateMode == UpdateMode.DISALLOW) {
|
|
|
|
throw new UnsupportedOperationException("update() is disallowed");
|
|
|
|
}
|
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
2021-05-08 03:09:00 +02:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
stamp = lock.readLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Reading {}", LLUtils.toStringSafe(key));
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
while (true) {
|
|
|
|
@Nullable ByteBuf prevData;
|
|
|
|
var prevDataHolder = existsAlmostCertainly ? null : new Holder<byte[]>();
|
2021-08-16 10:36:54 +02:00
|
|
|
if (existsAlmostCertainly
|
|
|
|
|| db.keyMayExist(cfh, LLUtils.toArray(key), prevDataHolder)) {
|
2021-08-16 10:27:47 +02:00
|
|
|
if (!existsAlmostCertainly && prevDataHolder.getValue() != null) {
|
|
|
|
byte @Nullable [] prevDataBytes = prevDataHolder.getValue();
|
|
|
|
if (prevDataBytes != null) {
|
|
|
|
prevData = wrappedBuffer(prevDataBytes);
|
2021-05-08 03:09:00 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
prevData = null;
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
prevData = dbGet(cfh, null, key.retain(), existsAlmostCertainly);
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} else {
|
|
|
|
prevData = null;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
@Nullable ByteBuf newData;
|
2021-08-16 10:36:54 +02:00
|
|
|
ByteBuf prevDataToSendToUpdater = prevData == null
|
|
|
|
? null
|
|
|
|
: prevData.retainedSlice();
|
2021-05-08 03:09:00 +02:00
|
|
|
try {
|
2021-08-16 10:36:54 +02:00
|
|
|
newData = updater.apply(prevDataToSendToUpdater == null
|
|
|
|
? null
|
|
|
|
: prevDataToSendToUpdater.retain());
|
2021-08-16 10:27:47 +02:00
|
|
|
if (!(prevDataToSendToUpdater == null
|
|
|
|
|| prevDataToSendToUpdater.readerIndex() == 0
|
|
|
|
|| !prevDataToSendToUpdater.isReadable())) {
|
|
|
|
throw new IllegalStateException("The updater has read the previous data partially"
|
|
|
|
+ " (read bytes: " + prevDataToSendToUpdater.readerIndex()
|
|
|
|
+ " unread bytes: " + prevDataToSendToUpdater.readableBytes() + ")."
|
|
|
|
+ " The only allowed options are reading the data fully or not reading it at all");
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
if (prevDataToSendToUpdater != null) {
|
|
|
|
prevDataToSendToUpdater.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (prevData != null && newData == null) {
|
|
|
|
//noinspection DuplicatedCode
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
var ws = lock.tryConvertToWriteLock(stamp);
|
|
|
|
if (ws != 0) {
|
|
|
|
stamp = ws;
|
|
|
|
} else {
|
|
|
|
lock.unlockRead(stamp);
|
2021-05-08 03:09:00 +02:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
stamp = lock.writeLock();
|
|
|
|
continue;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Deleting {}", LLUtils.toStringSafe(key));
|
|
|
|
}
|
|
|
|
dbDelete(cfh, null, key.retain());
|
|
|
|
} else if (newData != null
|
|
|
|
&& (prevData == null || !LLUtils.equals(prevData, newData))) {
|
|
|
|
//noinspection DuplicatedCode
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
var ws = lock.tryConvertToWriteLock(stamp);
|
|
|
|
if (ws != 0) {
|
|
|
|
stamp = ws;
|
|
|
|
} else {
|
|
|
|
lock.unlockRead(stamp);
|
2021-05-08 03:09:00 +02:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
stamp = lock.writeLock();
|
|
|
|
continue;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
if (logger.isTraceEnabled()) {
|
2021-08-16 10:36:54 +02:00
|
|
|
logger.trace("Writing {}: {}",
|
|
|
|
LLUtils.toStringSafe(key), LLUtils.toStringSafe(newData));
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
dbPut(cfh, null, key.retain(), newData.retain());
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return switch (updateReturnMode) {
|
|
|
|
case GET_NEW_VALUE -> newData != null ? newData.retain() : null;
|
|
|
|
case GET_OLD_VALUE -> prevData != null ? prevData.retain() : null;
|
|
|
|
case NOTHING -> null;
|
|
|
|
//noinspection UnnecessaryDefault
|
|
|
|
default -> throw new IllegalArgumentException();
|
|
|
|
};
|
2021-05-08 03:09:00 +02:00
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
if (newData != null) {
|
|
|
|
newData.release();
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
if (prevData != null) {
|
|
|
|
prevData.release();
|
|
|
|
}
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlock(stamp);
|
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:36:54 +02:00
|
|
|
}).onErrorMap(cause -> new IOException("Failed to read or write "
|
|
|
|
+ LLUtils.toStringSafe(key), cause)),
|
2021-08-16 10:27:47 +02:00
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2021-05-08 03:09:00 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Remember to change also update() if you are modifying this function
|
|
|
|
@SuppressWarnings("DuplicatedCode")
|
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<Delta<ByteBuf>> updateAndGetDelta(Mono<ByteBuf> keyMono,
|
2021-08-22 21:23:22 +02:00
|
|
|
SerializationFunction<@Nullable ByteBuf, @Nullable ByteBuf> updater,
|
2021-03-18 16:19:41 +01:00
|
|
|
boolean existsAlmostCertainly) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> this.runOnDb(() -> {
|
2021-08-16 10:36:54 +02:00
|
|
|
if (updateMode == UpdateMode.DISALLOW) {
|
|
|
|
throw new UnsupportedOperationException("update() is disallowed");
|
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
|
|
|
|
|
|
|
stamp = lock.readLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Reading {}", LLUtils.toStringSafe(key));
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
while (true) {
|
|
|
|
@Nullable ByteBuf prevData;
|
|
|
|
var prevDataHolder = existsAlmostCertainly ? null : new Holder<byte[]>();
|
2021-08-16 10:36:54 +02:00
|
|
|
if (existsAlmostCertainly
|
|
|
|
|| db.keyMayExist(cfh, LLUtils.toArray(key), prevDataHolder)) {
|
2021-08-16 10:27:47 +02:00
|
|
|
if (!existsAlmostCertainly && prevDataHolder.getValue() != null) {
|
|
|
|
byte @Nullable [] prevDataBytes = prevDataHolder.getValue();
|
|
|
|
if (prevDataBytes != null) {
|
|
|
|
prevData = wrappedBuffer(prevDataBytes);
|
2021-04-30 19:15:04 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
prevData = null;
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
prevData = dbGet(cfh, null, key.retain(), existsAlmostCertainly);
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} else {
|
|
|
|
prevData = null;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
@Nullable ByteBuf newData;
|
2021-08-16 10:36:54 +02:00
|
|
|
ByteBuf prevDataToSendToUpdater = prevData == null
|
|
|
|
? null
|
|
|
|
: prevData.retainedSlice();
|
2021-04-30 19:15:04 +02:00
|
|
|
try {
|
2021-08-16 10:36:54 +02:00
|
|
|
newData = updater.apply(prevDataToSendToUpdater == null
|
|
|
|
? null
|
|
|
|
: prevDataToSendToUpdater.retain());
|
2021-08-28 22:42:51 +02:00
|
|
|
assert prevDataToSendToUpdater == null
|
2021-08-16 10:27:47 +02:00
|
|
|
|| prevDataToSendToUpdater.readerIndex() == 0
|
|
|
|
|| !prevDataToSendToUpdater.isReadable();
|
|
|
|
} finally {
|
|
|
|
if (prevDataToSendToUpdater != null) {
|
|
|
|
prevDataToSendToUpdater.release();
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (prevData != null && newData == null) {
|
|
|
|
//noinspection DuplicatedCode
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
var ws = lock.tryConvertToWriteLock(stamp);
|
|
|
|
if (ws != 0) {
|
|
|
|
stamp = ws;
|
|
|
|
} else {
|
|
|
|
lock.unlockRead(stamp);
|
2021-02-13 02:16:24 +01:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
stamp = lock.writeLock();
|
|
|
|
continue;
|
2021-02-13 01:31:24 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Deleting {}", LLUtils.toStringSafe(key));
|
|
|
|
}
|
|
|
|
dbDelete(cfh, null, key.retain());
|
|
|
|
} else if (newData != null
|
|
|
|
&& (prevData == null || !LLUtils.equals(prevData, newData))) {
|
|
|
|
//noinspection DuplicatedCode
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
var ws = lock.tryConvertToWriteLock(stamp);
|
|
|
|
if (ws != 0) {
|
|
|
|
stamp = ws;
|
|
|
|
} else {
|
|
|
|
lock.unlockRead(stamp);
|
2021-02-13 02:16:24 +01:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
stamp = lock.writeLock();
|
|
|
|
continue;
|
2021-02-13 01:31:24 +01:00
|
|
|
}
|
2021-02-13 00:18:57 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
if (logger.isTraceEnabled()) {
|
2021-08-16 10:36:54 +02:00
|
|
|
logger.trace("Writing {}: {}",
|
|
|
|
LLUtils.toStringSafe(key), LLUtils.toStringSafe(newData));
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
dbPut(cfh, null, key.retain(), newData.retain());
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return new Delta<>(
|
|
|
|
prevData != null ? prevData.retain() : null,
|
|
|
|
newData != null ? newData.retain() : null
|
|
|
|
);
|
2021-04-30 19:15:04 +02:00
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
if (newData != null) {
|
|
|
|
newData.release();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
if (prevData != null) {
|
|
|
|
prevData.release();
|
|
|
|
}
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlock(stamp);
|
|
|
|
}
|
|
|
|
}
|
2021-08-16 10:36:54 +02:00
|
|
|
}).onErrorMap(cause -> new IOException("Failed to read or write "
|
|
|
|
+ LLUtils.toStringSafe(key), cause)),
|
2021-08-16 10:27:47 +02:00
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private void dbDelete(ColumnFamilyHandle cfh, @Nullable WriteOptions writeOptions, ByteBuf key)
|
|
|
|
throws RocksDBException {
|
|
|
|
try {
|
2021-08-16 10:36:54 +02:00
|
|
|
var validWriteOptions = Objects.requireNonNullElse(writeOptions, EMPTY_WRITE_OPTIONS);
|
2021-06-29 23:31:02 +02:00
|
|
|
if (databaseOptions.allowNettyDirect() && key.isDirect()) {
|
2021-05-03 21:41:51 +02:00
|
|
|
if (!key.isDirect()) {
|
|
|
|
throw new IllegalArgumentException("Key must be a direct buffer");
|
|
|
|
}
|
|
|
|
var keyNioBuffer = LLUtils.toDirect(key);
|
2021-08-16 10:36:54 +02:00
|
|
|
db.delete(cfh, validWriteOptions, keyNioBuffer);
|
2021-05-03 21:41:51 +02:00
|
|
|
} else {
|
2021-08-16 10:36:54 +02:00
|
|
|
db.delete(cfh, validWriteOptions, LLUtils.toArray(key));
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
key.release();
|
|
|
|
}
|
2021-02-06 19:21:31 +01:00
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<ByteBuf> remove(Mono<ByteBuf> keyMono, LLDictionaryResultType resultType) {
|
|
|
|
return Mono.usingWhen(keyMono,
|
|
|
|
key -> this
|
|
|
|
.getPreviousData(Mono.just(key).map(ByteBuf::retain), resultType)
|
|
|
|
.concatWith(this
|
|
|
|
.<ByteBuf>runOnDb(() -> {
|
2021-07-17 11:52:08 +02:00
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
2021-08-16 10:27:47 +02:00
|
|
|
|
|
|
|
stamp = lock.writeLock();
|
2021-07-17 11:52:08 +02:00
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
2021-08-16 10:27:47 +02:00
|
|
|
logger.trace("Deleting {}", LLUtils.toStringSafe(key));
|
2021-07-17 11:52:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
dbDelete(cfh, null, key.retain());
|
|
|
|
return null;
|
2021-07-17 11:52:08 +02:00
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
2021-08-16 10:27:47 +02:00
|
|
|
lock.unlockWrite(stamp);
|
2021-07-17 11:52:08 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
2021-08-16 10:36:54 +02:00
|
|
|
.onErrorMap(cause -> new IOException("Failed to delete "
|
|
|
|
+ LLUtils.toStringSafe(key), cause))
|
2021-08-16 10:27:47 +02:00
|
|
|
)
|
|
|
|
.singleOrEmpty(),
|
|
|
|
key -> Mono.fromCallable(key::release));
|
2021-01-30 00:24:55 +01:00
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
private Mono<ByteBuf> getPreviousData(Mono<ByteBuf> keyMono, LLDictionaryResultType resultType) {
|
|
|
|
return Mono
|
|
|
|
.usingWhen(keyMono,
|
|
|
|
key -> switch (resultType) {
|
|
|
|
case PREVIOUS_VALUE_EXISTENCE -> this
|
|
|
|
.containsKey(null, Mono.just(key).map(ByteBuf::retain))
|
|
|
|
.single()
|
|
|
|
.map(LLUtils::booleanToResponseByteBuffer)
|
|
|
|
.doAfterTerminate(() -> {
|
2021-08-28 22:42:51 +02:00
|
|
|
assert key.refCnt() > 0;
|
2021-08-16 10:27:47 +02:00
|
|
|
});
|
|
|
|
case PREVIOUS_VALUE -> Mono
|
|
|
|
.fromCallable(() -> {
|
|
|
|
StampedLock lock;
|
|
|
|
long stamp;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock = itemsLock.getAt(getLockIndex(key));
|
|
|
|
|
|
|
|
stamp = lock.readLock();
|
|
|
|
} else {
|
|
|
|
lock = null;
|
|
|
|
stamp = 0;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (logger.isTraceEnabled()) {
|
|
|
|
logger.trace("Reading {}", LLUtils.toArray(key));
|
|
|
|
}
|
|
|
|
var data = new Holder<byte[]>();
|
|
|
|
if (db.keyMayExist(cfh, LLUtils.toArray(key), data)) {
|
|
|
|
if (data.getValue() != null) {
|
|
|
|
return wrappedBuffer(data.getValue());
|
|
|
|
} else {
|
|
|
|
try {
|
|
|
|
return dbGet(cfh, null, key.retain(), true);
|
|
|
|
} finally {
|
2021-08-28 22:42:51 +02:00
|
|
|
assert key.refCnt() > 0;
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
lock.unlockRead(stamp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.onErrorMap(cause -> new IOException("Failed to read " + LLUtils.toStringSafe(key), cause))
|
|
|
|
.subscribeOn(dbScheduler);
|
|
|
|
case VOID -> Mono.empty();
|
|
|
|
},
|
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public <K> Flux<Tuple3<K, ByteBuf, Optional<ByteBuf>>> getMulti(@Nullable LLSnapshot snapshot,
|
|
|
|
Flux<Tuple2<K, ByteBuf>> keys,
|
|
|
|
boolean existsAlmostCertainly) {
|
|
|
|
return keys
|
|
|
|
.transform(normal -> new BufferTimeOutPublisher<>(normal, MULTI_GET_WINDOW, MULTI_GET_WINDOW_TIMEOUT))
|
2021-07-17 11:52:08 +02:00
|
|
|
.doOnDiscard(Tuple2.class, discardedEntry -> {
|
|
|
|
//noinspection unchecked
|
|
|
|
var entry = (Tuple2<K, ByteBuf>) discardedEntry;
|
|
|
|
entry.getT2().release();
|
|
|
|
})
|
|
|
|
.doOnDiscard(Tuple3.class, discardedEntry -> {
|
|
|
|
//noinspection unchecked
|
|
|
|
var entry = (Tuple3<K, ByteBuf, ByteBuf>) discardedEntry;
|
|
|
|
entry.getT2().release();
|
|
|
|
entry.getT3().release();
|
|
|
|
})
|
|
|
|
.flatMapSequential(keysWindow -> {
|
|
|
|
List<ByteBuf> keyBufsWindow = new ArrayList<>(keysWindow.size());
|
|
|
|
for (Tuple2<K, ByteBuf> objects : keysWindow) {
|
|
|
|
keyBufsWindow.add(objects.getT2());
|
|
|
|
}
|
|
|
|
return Mono
|
2021-07-31 18:00:53 +02:00
|
|
|
.fromCallable(() -> {
|
|
|
|
Iterable<StampedLock> locks;
|
|
|
|
ArrayList<Long> stamps;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
locks = itemsLock.bulkGetAt(getLockIndices(keyBufsWindow));
|
|
|
|
stamps = new ArrayList<>();
|
|
|
|
for (var lock : locks) {
|
2021-07-17 11:52:08 +02:00
|
|
|
|
2021-07-31 18:00:53 +02:00
|
|
|
stamps.add(lock.readLock());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
locks = null;
|
|
|
|
stamps = null;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
var columnFamilyHandles = new RepeatedElementList<>(cfh, keysWindow.size());
|
|
|
|
var results = db.multiGetAsList(resolveSnapshot(snapshot), columnFamilyHandles, LLUtils.toArray(keyBufsWindow));
|
|
|
|
var mappedResults = new ArrayList<Tuple3<K, ByteBuf, Optional<ByteBuf>>>(results.size());
|
|
|
|
for (int i = 0; i < results.size(); i++) {
|
|
|
|
byte[] val = results.get(i);
|
|
|
|
Optional<ByteBuf> valueOpt;
|
|
|
|
if (val != null) {
|
|
|
|
results.set(i, null);
|
|
|
|
valueOpt = Optional.of(wrappedBuffer(val));
|
2021-07-17 11:52:08 +02:00
|
|
|
} else {
|
2021-07-31 18:00:53 +02:00
|
|
|
valueOpt = Optional.empty();
|
2021-02-01 02:21:53 +01:00
|
|
|
}
|
2021-07-31 18:00:53 +02:00
|
|
|
mappedResults.add(Tuples.of(keysWindow.get(i).getT1(),
|
|
|
|
keyBufsWindow.get(i).retain(),
|
|
|
|
valueOpt
|
|
|
|
));
|
|
|
|
}
|
|
|
|
return mappedResults;
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
int index = 0;
|
|
|
|
for (var lock : locks) {
|
|
|
|
lock.unlockRead(stamps.get(index));
|
|
|
|
index++;
|
2021-07-17 11:52:08 +02:00
|
|
|
}
|
2021-07-31 18:00:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.subscribeOn(dbScheduler)
|
|
|
|
.flatMapIterable(list -> list)
|
|
|
|
.onErrorMap(cause -> new IOException("Failed to read keys "
|
|
|
|
+ Arrays.deepToString(keyBufsWindow.toArray(ByteBuf[]::new)), cause))
|
|
|
|
.doAfterTerminate(() -> keyBufsWindow.forEach(ReferenceCounted::release));
|
2021-07-17 11:52:08 +02:00
|
|
|
}, 2) // Max concurrency is 2 to read data while preparing the next segment
|
2021-05-02 19:18:15 +02:00
|
|
|
.doOnDiscard(Entry.class, discardedEntry -> {
|
2021-08-28 22:42:51 +02:00
|
|
|
var entry = (LLEntry) discardedEntry;
|
2021-05-02 19:18:15 +02:00
|
|
|
entry.getKey().release();
|
|
|
|
entry.getValue().release();
|
2021-07-17 11:52:08 +02:00
|
|
|
})
|
|
|
|
.doOnDiscard(Tuple3.class, discardedEntry -> {
|
|
|
|
//noinspection unchecked
|
2021-07-23 15:20:33 +02:00
|
|
|
var entry = (Tuple3<K, ByteBuf, Optional<ByteBuf>>) discardedEntry;
|
2021-07-17 11:52:08 +02:00
|
|
|
entry.getT2().release();
|
2021-07-23 15:20:33 +02:00
|
|
|
entry.getT3().ifPresent(ReferenceCounted::release);
|
2021-05-02 19:18:15 +02:00
|
|
|
});
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-02-01 02:21:53 +01:00
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Flux<LLEntry> putMulti(Flux<LLEntry> entries, boolean getOldValues) {
|
2021-02-01 02:21:53 +01:00
|
|
|
return entries
|
2021-07-17 11:52:08 +02:00
|
|
|
.buffer(Math.min(MULTI_GET_WINDOW, CAPPED_WRITE_BATCH_CAP))
|
|
|
|
.flatMapSequential(ew -> Mono
|
2021-05-02 19:18:15 +02:00
|
|
|
.using(
|
|
|
|
() -> ew,
|
|
|
|
entriesWindow -> Mono
|
2021-08-28 22:42:51 +02:00
|
|
|
.<LLEntry>fromCallable(() -> {
|
2021-05-02 19:18:15 +02:00
|
|
|
Iterable<StampedLock> locks;
|
|
|
|
ArrayList<Long> stamps;
|
2021-04-30 19:15:04 +02:00
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
2021-05-02 19:18:15 +02:00
|
|
|
locks = itemsLock.bulkGetAt(getLockIndicesEntries(entriesWindow));
|
|
|
|
stamps = new ArrayList<>();
|
2021-04-30 19:15:04 +02:00
|
|
|
for (var lock : locks) {
|
2021-05-02 19:18:15 +02:00
|
|
|
stamps.add(lock.writeLock());
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-02 19:18:15 +02:00
|
|
|
} else {
|
|
|
|
locks = null;
|
|
|
|
stamps = null;
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-02 19:18:15 +02:00
|
|
|
try {
|
2021-05-03 12:29:15 +02:00
|
|
|
if (USE_WRITE_BATCHES_IN_PUT_MULTI) {
|
2021-05-02 19:18:15 +02:00
|
|
|
var batch = new CappedWriteBatch(db,
|
|
|
|
CAPPED_WRITE_BATCH_CAP,
|
|
|
|
RESERVED_WRITE_BATCH_SIZE,
|
|
|
|
MAX_WRITE_BATCH_SIZE,
|
|
|
|
BATCH_WRITE_OPTIONS
|
|
|
|
);
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesWindow) {
|
|
|
|
var k = entry.getKey().retain();
|
|
|
|
var v = entry.getValue().retain();
|
|
|
|
batch.put(cfh, k, v);
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
|
|
|
batch.writeToDbAndClose();
|
|
|
|
batch.close();
|
|
|
|
} else {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesWindow) {
|
2021-05-03 12:44:22 +02:00
|
|
|
db.put(cfh, EMPTY_WRITE_OPTIONS, entry.getKey().nioBuffer(), entry.getValue().nioBuffer());
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
int index = 0;
|
|
|
|
for (var lock : locks) {
|
|
|
|
lock.unlockWrite(stamps.get(index));
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
2021-05-11 21:59:05 +02:00
|
|
|
.subscribeOn(dbScheduler)
|
2021-05-02 19:18:15 +02:00
|
|
|
|
|
|
|
// Prepend everything to get previous elements
|
2021-07-17 11:52:08 +02:00
|
|
|
.transform(transformer -> {
|
|
|
|
var obj = new Object();
|
2021-05-02 19:18:15 +02:00
|
|
|
if (getOldValues) {
|
|
|
|
return this
|
|
|
|
.getMulti(null, Flux
|
|
|
|
.fromIterable(entriesWindow)
|
2021-08-28 22:42:51 +02:00
|
|
|
.map(entry -> entry.getKey().retain())
|
2021-07-17 11:52:08 +02:00
|
|
|
.map(buf -> Tuples.of(obj, buf)), false)
|
2021-05-02 19:18:15 +02:00
|
|
|
.publishOn(dbScheduler)
|
|
|
|
.then(transformer);
|
|
|
|
} else {
|
|
|
|
return transformer;
|
|
|
|
}
|
|
|
|
}),
|
|
|
|
entriesWindow -> {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesWindow) {
|
|
|
|
entry.release();
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
|
|
|
}
|
2021-07-17 11:52:08 +02:00
|
|
|
), 2) // Max concurrency is 2 to read data while preparing the next segment
|
2021-08-22 23:50:50 +02:00
|
|
|
.transform(LLUtils::handleDiscard);
|
2021-02-01 02:21:53 +01:00
|
|
|
}
|
|
|
|
|
2021-07-17 11:52:08 +02:00
|
|
|
@Override
|
|
|
|
public <X> Flux<ExtraKeyOperationResult<ByteBuf, X>> updateMulti(Flux<Tuple2<ByteBuf, X>> entries,
|
2021-08-22 21:23:22 +02:00
|
|
|
BiSerializationFunction<ByteBuf, X, ByteBuf> updateFunction) {
|
2021-07-17 11:52:08 +02:00
|
|
|
return entries
|
|
|
|
.buffer(Math.min(MULTI_GET_WINDOW, CAPPED_WRITE_BATCH_CAP))
|
|
|
|
.flatMapSequential(ew -> Flux
|
|
|
|
.using(
|
|
|
|
() -> ew,
|
|
|
|
entriesWindow -> {
|
|
|
|
List<ByteBuf> keyBufsWindow = new ArrayList<>(entriesWindow.size());
|
|
|
|
for (Tuple2<ByteBuf, X> objects : entriesWindow) {
|
|
|
|
keyBufsWindow.add(objects.getT1());
|
|
|
|
}
|
|
|
|
return Mono
|
|
|
|
.<Iterable<ExtraKeyOperationResult<ByteBuf, X>>>fromCallable(() -> {
|
|
|
|
Iterable<StampedLock> locks;
|
|
|
|
ArrayList<Long> stamps;
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
locks = itemsLock.bulkGetAt(getLockIndicesWithExtra(entriesWindow));
|
|
|
|
stamps = new ArrayList<>();
|
|
|
|
for (var lock : locks) {
|
|
|
|
stamps.add(lock.writeLock());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
locks = null;
|
|
|
|
stamps = null;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
var columnFamilyHandles = new RepeatedElementList<>(cfh, entriesWindow.size());
|
|
|
|
ArrayList<Tuple3<ByteBuf, X, Optional<ByteBuf>>> mappedInputs;
|
|
|
|
{
|
|
|
|
var inputs = db.multiGetAsList(resolveSnapshot(null), columnFamilyHandles, LLUtils.toArray(keyBufsWindow));
|
|
|
|
mappedInputs = new ArrayList<>(inputs.size());
|
|
|
|
for (int i = 0; i < inputs.size(); i++) {
|
|
|
|
var val = inputs.get(i);
|
|
|
|
if (val != null) {
|
|
|
|
inputs.set(i, null);
|
|
|
|
mappedInputs.add(Tuples.of(
|
|
|
|
keyBufsWindow.get(i).retain(),
|
|
|
|
entriesWindow.get(i).getT2(),
|
|
|
|
Optional.of(wrappedBuffer(val))
|
|
|
|
));
|
|
|
|
} else {
|
|
|
|
mappedInputs.add(Tuples.of(
|
|
|
|
keyBufsWindow.get(i).retain(),
|
|
|
|
entriesWindow.get(i).getT2(),
|
|
|
|
Optional.empty()
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
var updatedValuesToWrite = new ArrayList<ByteBuf>(mappedInputs.size());
|
|
|
|
var valueChangedResult = new ArrayList<ExtraKeyOperationResult<ByteBuf, X>>(mappedInputs.size());
|
|
|
|
try {
|
|
|
|
for (var mappedInput : mappedInputs) {
|
2021-08-27 02:49:51 +02:00
|
|
|
//noinspection BlockingMethodInNonBlockingContext
|
2021-07-17 11:52:08 +02:00
|
|
|
var updatedValue = updateFunction.apply(mappedInput.getT1().retain(), mappedInput.getT2());
|
|
|
|
valueChangedResult.add(new ExtraKeyOperationResult<>(mappedInput.getT1(),
|
|
|
|
mappedInput.getT2(),
|
|
|
|
!Objects.equals(mappedInput.getT3().orElse(null), updatedValue.retain())
|
|
|
|
));
|
|
|
|
updatedValuesToWrite.add(updatedValue);
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
for (var mappedInput : mappedInputs) {
|
|
|
|
mappedInput.getT3().ifPresent(ReferenceCounted::release);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (USE_WRITE_BATCHES_IN_PUT_MULTI) {
|
|
|
|
var batch = new CappedWriteBatch(db,
|
|
|
|
CAPPED_WRITE_BATCH_CAP,
|
|
|
|
RESERVED_WRITE_BATCH_SIZE,
|
|
|
|
MAX_WRITE_BATCH_SIZE,
|
|
|
|
BATCH_WRITE_OPTIONS
|
|
|
|
);
|
|
|
|
int i = 0;
|
|
|
|
for (Tuple2<ByteBuf, X> entry : entriesWindow) {
|
|
|
|
var valueToWrite = updatedValuesToWrite.get(i);
|
|
|
|
if (valueToWrite == null) {
|
|
|
|
batch.delete(cfh, entry.getT1().retain());
|
|
|
|
} else {
|
|
|
|
batch.put(cfh, entry.getT1().retain(), valueToWrite.retain());
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
batch.writeToDbAndClose();
|
|
|
|
batch.close();
|
|
|
|
} else {
|
|
|
|
int i = 0;
|
|
|
|
for (Tuple2<ByteBuf, X> entry : entriesWindow) {
|
|
|
|
var valueToWrite = updatedValuesToWrite.get(i);
|
|
|
|
db.put(cfh, EMPTY_WRITE_OPTIONS, entry.getT1().nioBuffer(), valueToWrite.nioBuffer());
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return valueChangedResult;
|
|
|
|
} finally {
|
|
|
|
if (updateMode == UpdateMode.ALLOW) {
|
|
|
|
int index = 0;
|
|
|
|
for (var lock : locks) {
|
|
|
|
lock.unlockWrite(stamps.get(index));
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.subscribeOn(dbScheduler)
|
2021-07-31 19:18:20 +02:00
|
|
|
.flatMapIterable(list -> list);
|
2021-07-17 11:52:08 +02:00
|
|
|
},
|
|
|
|
entriesWindow -> {
|
|
|
|
for (Tuple2<ByteBuf, X> entry : entriesWindow) {
|
|
|
|
entry.getT1().release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
), 2 // Max concurrency is 2 to update data while preparing the next segment
|
|
|
|
)
|
|
|
|
.doOnDiscard(Tuple2.class, entry -> {
|
|
|
|
if (entry.getT1() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
if (entry.getT2() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.doOnDiscard(ExtraKeyOperationResult.class, entry -> {
|
|
|
|
if (entry.key() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
if (entry.extra() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.doOnDiscard(Collection.class, obj -> {
|
|
|
|
//noinspection unchecked
|
|
|
|
var castedEntries = (Collection<ExtraKeyOperationResult<Object, Object>>) obj;
|
|
|
|
for (var entry : castedEntries) {
|
|
|
|
if (entry.key() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
if (entry.extra() instanceof ByteBuf bb) {
|
|
|
|
bb.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Flux<LLEntry> getRange(@Nullable LLSnapshot snapshot,
|
2021-08-16 10:27:47 +02:00
|
|
|
Mono<LLRange> rangeMono,
|
2021-03-18 16:19:41 +01:00
|
|
|
boolean existsAlmostCertainly) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Flux.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
return getRangeSingle(snapshot, Mono.just(range.getMin()).map(ByteBuf::retain), existsAlmostCertainly);
|
|
|
|
} else {
|
|
|
|
return getRangeMulti(snapshot, Mono.just(range).map(LLRange::retain));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-02-02 00:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Flux<List<LLEntry>> getRangeGrouped(@Nullable LLSnapshot snapshot,
|
2021-08-16 10:27:47 +02:00
|
|
|
Mono<LLRange> rangeMono,
|
2021-03-18 16:19:41 +01:00
|
|
|
int prefixLength, boolean existsAlmostCertainly) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Flux.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
var rangeSingleMono = Mono.just(range.getMin()).map(ByteBuf::retain);
|
|
|
|
return getRangeSingle(snapshot, rangeSingleMono, existsAlmostCertainly).map(List::of);
|
|
|
|
} else {
|
|
|
|
return getRangeMultiGrouped(snapshot, Mono.just(range).map(LLRange::retain), prefixLength);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-01-30 00:24:55 +01:00
|
|
|
}
|
|
|
|
|
2021-08-28 22:42:51 +02:00
|
|
|
private Flux<LLEntry> getRangeSingle(LLSnapshot snapshot,
|
2021-08-16 10:27:47 +02:00
|
|
|
Mono<ByteBuf> keyMono,
|
|
|
|
boolean existsAlmostCertainly) {
|
|
|
|
return Flux.usingWhen(keyMono,
|
|
|
|
key -> this
|
|
|
|
.get(snapshot, Mono.just(key).map(ByteBuf::retain), existsAlmostCertainly)
|
2021-08-28 22:42:51 +02:00
|
|
|
.map(value -> new LLEntry(key.retain(), value)),
|
2021-08-16 10:27:47 +02:00
|
|
|
key -> Mono.fromRunnable(key::release)
|
2021-08-28 22:42:51 +02:00
|
|
|
).transform(LLUtils::handleDiscard);
|
2021-01-30 20:16:14 +01:00
|
|
|
}
|
|
|
|
|
2021-08-28 22:42:51 +02:00
|
|
|
private Flux<LLEntry> getRangeMulti(LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Flux.usingWhen(rangeMono,
|
2021-08-27 02:49:51 +02:00
|
|
|
range -> Flux.using(
|
|
|
|
() -> new LLLocalEntryReactiveRocksIterator(db, alloc, cfh, range.retain(),
|
|
|
|
databaseOptions.allowNettyDirect(), resolveSnapshot(snapshot), getRangeMultiDebugName),
|
|
|
|
llLocalEntryReactiveRocksIterator -> llLocalEntryReactiveRocksIterator.flux().subscribeOn(dbScheduler),
|
|
|
|
LLLocalReactiveRocksIterator::release
|
|
|
|
).transform(LLUtils::handleDiscard),
|
2021-08-16 10:27:47 +02:00
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-02-02 00:09:46 +01:00
|
|
|
}
|
|
|
|
|
2021-08-28 22:42:51 +02:00
|
|
|
private Flux<List<LLEntry>> getRangeMultiGrouped(LLSnapshot snapshot, Mono<LLRange> rangeMono, int prefixLength) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Flux.usingWhen(rangeMono,
|
2021-08-27 02:49:51 +02:00
|
|
|
range -> Flux.using(
|
|
|
|
() -> new LLLocalGroupedEntryReactiveRocksIterator(db, alloc, cfh, prefixLength, range.retain(),
|
|
|
|
databaseOptions.allowNettyDirect(), resolveSnapshot(snapshot), "getRangeMultiGrouped"),
|
|
|
|
reactiveRocksIterator -> reactiveRocksIterator.flux().subscribeOn(dbScheduler),
|
|
|
|
LLLocalGroupedReactiveRocksIterator::release
|
|
|
|
).transform(LLUtils::handleDiscard),
|
2021-08-16 10:27:47 +02:00
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-01-30 00:24:55 +01:00
|
|
|
}
|
|
|
|
|
2021-01-30 20:16:14 +01:00
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Flux<ByteBuf> getRangeKeys(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
|
|
|
return Flux.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
return this.getRangeKeysSingle(snapshot, Mono.just(range.getMin()).map(ByteBuf::retain));
|
|
|
|
} else {
|
|
|
|
return this.getRangeKeysMulti(snapshot, Mono.just(range).map(LLRange::retain));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-02-02 00:09:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Flux<List<ByteBuf>> getRangeKeysGrouped(@Nullable LLSnapshot snapshot,
|
|
|
|
Mono<LLRange> rangeMono,
|
|
|
|
int prefixLength) {
|
|
|
|
return Flux.usingWhen(rangeMono,
|
2021-08-27 02:49:51 +02:00
|
|
|
range -> Flux.using(
|
|
|
|
() -> new LLLocalGroupedKeyReactiveRocksIterator(db, alloc, cfh, prefixLength, range.retain(),
|
|
|
|
databaseOptions.allowNettyDirect(), resolveSnapshot(snapshot), "getRangeKeysGrouped"),
|
|
|
|
reactiveRocksIterator -> reactiveRocksIterator.flux().subscribeOn(dbScheduler),
|
|
|
|
LLLocalGroupedReactiveRocksIterator::release
|
|
|
|
).transform(LLUtils::handleDiscard),
|
2021-08-16 10:27:47 +02:00
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-03-14 13:24:46 +01:00
|
|
|
}
|
|
|
|
|
2021-06-27 11:58:12 +02:00
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Flux<BadBlock> badBlocks(Mono<LLRange> rangeMono) {
|
|
|
|
return Flux.usingWhen(rangeMono,
|
|
|
|
range -> Flux
|
|
|
|
.<BadBlock>create(sink -> {
|
|
|
|
try (var ro = new ReadOptions(getReadOptions(null))) {
|
|
|
|
ro.setFillCache(false);
|
|
|
|
if (!range.isSingle()) {
|
|
|
|
ro.setReadaheadSize(32 * 1024);
|
|
|
|
}
|
|
|
|
ro.setVerifyChecksums(true);
|
|
|
|
var rocksIteratorTuple = getRocksIterator(databaseOptions.allowNettyDirect(), ro, range.retain(), db, cfh);
|
|
|
|
try {
|
|
|
|
try (var rocksIterator = rocksIteratorTuple.getT1()) {
|
|
|
|
rocksIterator.seekToFirst();
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.status();
|
2021-08-16 10:27:47 +02:00
|
|
|
while (rocksIterator.isValid() && !sink.isCancelled()) {
|
|
|
|
try {
|
|
|
|
rocksIterator.status();
|
|
|
|
rocksIterator.key(DUMMY_WRITE_ONLY_BYTE_BUFFER);
|
|
|
|
rocksIterator.status();
|
|
|
|
rocksIterator.value(DUMMY_WRITE_ONLY_BYTE_BUFFER);
|
|
|
|
rocksIterator.status();
|
|
|
|
} catch (RocksDBException ex) {
|
|
|
|
sink.next(new BadBlock(databaseName, Column.special(columnName), null, ex));
|
|
|
|
}
|
|
|
|
rocksIterator.next();
|
|
|
|
}
|
2021-06-27 11:58:12 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
rocksIteratorTuple.getT2().release();
|
|
|
|
rocksIteratorTuple.getT3().release();
|
2021-06-27 11:58:12 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
sink.complete();
|
|
|
|
} catch (Throwable ex) {
|
|
|
|
sink.error(ex);
|
2021-06-27 11:58:12 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(dbScheduler),
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-06-27 11:58:12 +02:00
|
|
|
}
|
|
|
|
|
2021-03-14 13:24:46 +01:00
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Flux<ByteBuf> getRangeKeyPrefixes(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono, int prefixLength) {
|
|
|
|
return Flux.usingWhen(rangeMono,
|
|
|
|
range -> Flux
|
|
|
|
.using(
|
|
|
|
() -> new LLLocalKeyPrefixReactiveRocksIterator(db,
|
|
|
|
alloc,
|
|
|
|
cfh,
|
|
|
|
prefixLength,
|
|
|
|
range.retain(),
|
|
|
|
databaseOptions.allowNettyDirect(),
|
|
|
|
resolveSnapshot(snapshot),
|
|
|
|
true,
|
|
|
|
"getRangeKeysGrouped"
|
|
|
|
),
|
|
|
|
LLLocalKeyPrefixReactiveRocksIterator::flux,
|
|
|
|
LLLocalKeyPrefixReactiveRocksIterator::release
|
|
|
|
)
|
|
|
|
.subscribeOn(dbScheduler),
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-01-30 20:16:14 +01:00
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
private Flux<ByteBuf> getRangeKeysSingle(LLSnapshot snapshot, Mono<ByteBuf> keyMono) {
|
|
|
|
return Flux.usingWhen(keyMono,
|
|
|
|
key -> this
|
|
|
|
.containsKey(snapshot, Mono.just(key).map(ByteBuf::retain))
|
|
|
|
.flux()
|
|
|
|
.<ByteBuf>handle((contains, sink) -> {
|
|
|
|
if (contains) {
|
|
|
|
sink.next(key.retain());
|
|
|
|
} else {
|
|
|
|
sink.complete();
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.doOnDiscard(ByteBuf.class, ReferenceCounted::release),
|
|
|
|
key -> Mono.fromRunnable(key::release)
|
|
|
|
);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
private Flux<ByteBuf> getRangeKeysMulti(LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
|
|
|
return Flux.usingWhen(rangeMono,
|
2021-08-27 02:49:51 +02:00
|
|
|
range -> Flux.using(
|
|
|
|
() -> new LLLocalKeyReactiveRocksIterator(db, alloc, cfh, range.retain(),
|
|
|
|
databaseOptions.allowNettyDirect(), resolveSnapshot(snapshot), getRangeKeysMultiDebugName),
|
|
|
|
llLocalKeyReactiveRocksIterator -> llLocalKeyReactiveRocksIterator.flux().subscribeOn(dbScheduler),
|
|
|
|
LLLocalReactiveRocksIterator::release
|
|
|
|
).transform(LLUtils::handleDiscard),
|
2021-08-16 10:27:47 +02:00
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-01-30 20:16:14 +01:00
|
|
|
}
|
|
|
|
|
2020-12-07 22:15:18 +01:00
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Mono<Void> setRange(Mono<LLRange> rangeMono, Flux<LLEntry> entries) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (USE_WINDOW_IN_SET_RANGE) {
|
|
|
|
return Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
if (!USE_WRITE_BATCH_IN_SET_RANGE_DELETE || !USE_WRITE_BATCHES_IN_SET_RANGE) {
|
2021-08-22 18:20:05 +02:00
|
|
|
assert EMPTY_READ_OPTIONS.isOwningHandle();
|
2021-08-16 10:27:47 +02:00
|
|
|
try (var opts = new ReadOptions(EMPTY_READ_OPTIONS)) {
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
opts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
2021-06-19 21:55:20 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
minBound = emptyReleasableSlice();
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
opts,
|
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
maxBound = emptyReleasableSlice();
|
|
|
|
}
|
2021-08-22 18:20:05 +02:00
|
|
|
assert cfh.isOwningHandle();
|
|
|
|
assert opts.isOwningHandle();
|
2021-08-16 10:27:47 +02:00
|
|
|
try (RocksIterator it = db.newIterator(cfh, opts)) {
|
|
|
|
if (!PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), it, range.getMin().retain());
|
|
|
|
} else {
|
|
|
|
it.seekToFirst();
|
|
|
|
}
|
|
|
|
it.status();
|
|
|
|
while (it.isValid()) {
|
|
|
|
db.delete(cfh, it.key());
|
|
|
|
it.next();
|
|
|
|
it.status();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
maxBound.release();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} else if (USE_CAPPED_WRITE_BATCH_IN_SET_RANGE) {
|
|
|
|
try (var batch = new CappedWriteBatch(db,
|
|
|
|
CAPPED_WRITE_BATCH_CAP,
|
|
|
|
RESERVED_WRITE_BATCH_SIZE,
|
|
|
|
MAX_WRITE_BATCH_SIZE,
|
|
|
|
BATCH_WRITE_OPTIONS
|
|
|
|
)) {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
batch.delete(cfh, range.getSingle().retain());
|
|
|
|
} else {
|
|
|
|
deleteSmallRangeWriteBatch(batch, range.retain());
|
|
|
|
}
|
|
|
|
batch.writeToDbAndClose();
|
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
try (var batch = new WriteBatch(RESERVED_WRITE_BATCH_SIZE)) {
|
|
|
|
if (range.isSingle()) {
|
|
|
|
batch.delete(cfh, LLUtils.toArray(range.getSingle()));
|
|
|
|
} else {
|
|
|
|
deleteSmallRangeWriteBatch(batch, range.retain());
|
|
|
|
}
|
|
|
|
db.write(EMPTY_WRITE_OPTIONS, batch);
|
|
|
|
batch.clear();
|
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return null;
|
2021-05-12 01:25:59 +02:00
|
|
|
})
|
2021-08-16 10:27:47 +02:00
|
|
|
.subscribeOn(dbScheduler)
|
2021-08-29 01:15:51 +02:00
|
|
|
.thenMany(entries.window(MULTI_GET_WINDOW))
|
2021-08-16 10:27:47 +02:00
|
|
|
.flatMap(keysWindowFlux -> keysWindowFlux
|
|
|
|
.collectList()
|
|
|
|
.flatMap(entriesList -> Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
try {
|
|
|
|
if (!USE_WRITE_BATCHES_IN_SET_RANGE) {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesList) {
|
|
|
|
assert !entry.isReleased();
|
|
|
|
assert entry.getKey().refCnt() > 0;
|
|
|
|
assert entry.getValue().refCnt() > 0;
|
2021-08-16 10:27:47 +02:00
|
|
|
db.put(cfh, EMPTY_WRITE_OPTIONS, entry.getKey().nioBuffer(), entry.getValue().nioBuffer());
|
|
|
|
}
|
|
|
|
} else if (USE_CAPPED_WRITE_BATCH_IN_SET_RANGE) {
|
|
|
|
try (var batch = new CappedWriteBatch(db,
|
|
|
|
CAPPED_WRITE_BATCH_CAP,
|
|
|
|
RESERVED_WRITE_BATCH_SIZE,
|
|
|
|
MAX_WRITE_BATCH_SIZE,
|
|
|
|
BATCH_WRITE_OPTIONS
|
|
|
|
)) {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesList) {
|
|
|
|
assert !entry.isReleased();
|
|
|
|
assert entry.getKey().refCnt() > 0;
|
|
|
|
assert entry.getValue().refCnt() > 0;
|
2021-08-16 10:27:47 +02:00
|
|
|
batch.put(cfh, entry.getKey().retain(), entry.getValue().retain());
|
|
|
|
}
|
|
|
|
batch.writeToDbAndClose();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
try (var batch = new WriteBatch(RESERVED_WRITE_BATCH_SIZE)) {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesList) {
|
|
|
|
assert !entry.isReleased();
|
|
|
|
assert entry.getKey().refCnt() > 0;
|
|
|
|
assert entry.getValue().refCnt() > 0;
|
2021-08-16 10:27:47 +02:00
|
|
|
batch.put(cfh, LLUtils.toArray(entry.getKey()), LLUtils.toArray(entry.getValue()));
|
|
|
|
}
|
|
|
|
db.write(EMPTY_WRITE_OPTIONS, batch);
|
|
|
|
batch.clear();
|
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return null;
|
|
|
|
} finally {
|
2021-08-28 22:42:51 +02:00
|
|
|
for (LLEntry entry : entriesList) {
|
|
|
|
assert !entry.isReleased();
|
|
|
|
entry.release();
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(dbScheduler)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.then()
|
|
|
|
.onErrorMap(cause -> new IOException("Failed to write range", cause));
|
|
|
|
} else {
|
|
|
|
if (USE_WRITE_BATCHES_IN_SET_RANGE) {
|
|
|
|
return Mono.fromCallable(() -> {
|
|
|
|
throw new UnsupportedOperationException("Can't use write batches in setRange without window. Please fix params");
|
|
|
|
});
|
|
|
|
}
|
|
|
|
return this
|
|
|
|
.getRange(null, Mono.just(range).map(LLRange::retain), false)
|
|
|
|
.flatMap(oldValue -> Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
|
|
|
try {
|
|
|
|
dbDelete(cfh, EMPTY_WRITE_OPTIONS, oldValue.getKey().retain());
|
2021-05-12 01:25:59 +02:00
|
|
|
return null;
|
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
oldValue.getKey().release();
|
|
|
|
oldValue.getValue().release();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-12 01:25:59 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(dbScheduler)
|
|
|
|
)
|
2021-08-16 10:27:47 +02:00
|
|
|
.then(entries
|
|
|
|
.flatMap(entry -> Mono.using(
|
|
|
|
() -> entry,
|
|
|
|
releasableEntry -> this
|
|
|
|
.put(Mono.just(entry.getKey()).map(ByteBuf::retain),
|
|
|
|
Mono.just(entry.getValue()).map(ByteBuf::retain),
|
|
|
|
LLDictionaryResultType.VOID
|
|
|
|
)
|
|
|
|
.doOnNext(ReferenceCounted::release),
|
|
|
|
releasableEntry -> {
|
|
|
|
releasableEntry.getKey().release();
|
|
|
|
releasableEntry.getValue().release();
|
|
|
|
})
|
|
|
|
)
|
|
|
|
.then(Mono.<Void>empty())
|
|
|
|
)
|
|
|
|
.onErrorMap(cause -> new IOException("Failed to write range", cause));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-03-14 03:13:19 +01:00
|
|
|
}
|
|
|
|
|
2021-08-16 10:27:47 +02:00
|
|
|
//todo: this is broken, check why. (is this still true?)
|
2021-03-20 12:41:11 +01:00
|
|
|
private void deleteSmallRangeWriteBatch(CappedWriteBatch writeBatch, LLRange range)
|
|
|
|
throws RocksDBException {
|
2021-06-19 21:55:20 +02:00
|
|
|
try (var readOpts = new ReadOptions(getReadOptions(null))) {
|
2021-05-03 02:57:08 +02:00
|
|
|
readOpts.setFillCache(false);
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
2021-03-20 12:41:11 +01:00
|
|
|
} else {
|
2021-05-03 02:57:08 +02:00
|
|
|
minBound = emptyReleasableSlice();
|
2021-03-20 12:41:11 +01:00
|
|
|
}
|
2021-05-03 02:57:08 +02:00
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
|
|
|
);
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
|
|
|
maxBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try (var rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), rocksIterator, range.getMin().retain());
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
|
|
|
rocksIterator.seekToFirst();
|
|
|
|
}
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.status();
|
2021-05-03 02:57:08 +02:00
|
|
|
while (rocksIterator.isValid()) {
|
2021-05-03 12:44:22 +02:00
|
|
|
writeBatch.delete(cfh, LLUtils.readDirectNioBuffer(alloc, rocksIterator::key));
|
2021-05-03 02:57:08 +02:00
|
|
|
rocksIterator.next();
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.status();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
maxBound.release();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-03-20 12:41:11 +01:00
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
} finally {
|
|
|
|
range.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-02 19:18:15 +02:00
|
|
|
private void deleteSmallRangeWriteBatch(WriteBatch writeBatch, LLRange range)
|
|
|
|
throws RocksDBException {
|
2021-06-19 21:55:20 +02:00
|
|
|
try (var readOpts = new ReadOptions(getReadOptions(null))) {
|
2021-05-03 02:57:08 +02:00
|
|
|
readOpts.setFillCache(false);
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
2021-05-02 19:18:15 +02:00
|
|
|
} else {
|
2021-05-03 02:57:08 +02:00
|
|
|
minBound = emptyReleasableSlice();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-03 02:57:08 +02:00
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
|
|
|
);
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
|
|
|
maxBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try (var rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), rocksIterator, range.getMin().retain());
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
|
|
|
rocksIterator.seekToFirst();
|
|
|
|
}
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.status();
|
2021-05-03 02:57:08 +02:00
|
|
|
while (rocksIterator.isValid()) {
|
|
|
|
writeBatch.delete(cfh, rocksIterator.key());
|
|
|
|
rocksIterator.next();
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.status();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
maxBound.release();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-02 19:18:15 +02:00
|
|
|
} finally {
|
|
|
|
range.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-29 23:31:02 +02:00
|
|
|
private static void rocksIterSeekTo(boolean allowNettyDirect, RocksIterator rocksIterator, ByteBuf buffer) {
|
2021-05-02 19:18:15 +02:00
|
|
|
try {
|
2021-06-29 23:31:02 +02:00
|
|
|
if (allowNettyDirect && buffer.isDirect()) {
|
2021-05-03 21:41:51 +02:00
|
|
|
ByteBuffer nioBuffer = LLUtils.toDirect(buffer);
|
|
|
|
assert nioBuffer.isDirect();
|
|
|
|
rocksIterator.seek(nioBuffer);
|
|
|
|
} else if (buffer.hasArray() && buffer.array().length == buffer.readableBytes()) {
|
|
|
|
rocksIterator.seek(buffer.array());
|
|
|
|
} else {
|
|
|
|
rocksIterator.seek(LLUtils.toArray(buffer));
|
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
} finally {
|
|
|
|
buffer.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-29 23:31:02 +02:00
|
|
|
private static ReleasableSlice setIterateBound(boolean allowNettyDirect, ReadOptions readOpts, IterateBound boundType, ByteBuf buffer) {
|
2021-04-30 19:15:04 +02:00
|
|
|
try {
|
2021-05-03 21:41:51 +02:00
|
|
|
Objects.requireNonNull(buffer);
|
2021-04-30 19:15:04 +02:00
|
|
|
AbstractSlice<?> slice;
|
2021-06-29 23:31:02 +02:00
|
|
|
if (allowNettyDirect && LLLocalDictionary.USE_DIRECT_BUFFER_BOUNDS && buffer.isDirect()) {
|
2021-05-03 21:41:51 +02:00
|
|
|
ByteBuffer nioBuffer = LLUtils.toDirect(buffer);
|
2021-05-03 00:29:26 +02:00
|
|
|
assert nioBuffer.isDirect();
|
|
|
|
slice = new DirectSlice(nioBuffer, buffer.readableBytes());
|
|
|
|
assert slice.size() == buffer.readableBytes();
|
|
|
|
assert slice.compare(new Slice(LLUtils.toArray(buffer))) == 0;
|
2021-05-03 21:41:51 +02:00
|
|
|
if (boundType == IterateBound.LOWER) {
|
|
|
|
readOpts.setIterateLowerBound(slice);
|
|
|
|
} else {
|
|
|
|
readOpts.setIterateUpperBound(slice);
|
|
|
|
}
|
2021-05-21 00:19:40 +02:00
|
|
|
return new ReleasableSliceImpl(slice, buffer.retain(), nioBuffer);
|
2021-05-03 00:29:26 +02:00
|
|
|
} else {
|
2021-05-03 21:41:51 +02:00
|
|
|
slice = new Slice(Objects.requireNonNull(LLUtils.toArray(buffer)));
|
|
|
|
if (boundType == IterateBound.LOWER) {
|
|
|
|
readOpts.setIterateLowerBound(slice);
|
|
|
|
} else {
|
|
|
|
readOpts.setIterateUpperBound(slice);
|
|
|
|
}
|
2021-05-21 00:19:40 +02:00
|
|
|
return new ReleasableSliceImpl(slice, null, null);
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
buffer.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-02 19:18:15 +02:00
|
|
|
private static ReleasableSlice emptyReleasableSlice() {
|
|
|
|
var arr = new byte[0];
|
2021-05-21 00:19:40 +02:00
|
|
|
|
|
|
|
return new SimpleSliceWithoutRelease(new Slice(arr), null, arr);
|
2021-05-02 19:18:15 +02:00
|
|
|
}
|
2021-04-30 19:15:04 +02:00
|
|
|
|
2021-05-21 00:19:40 +02:00
|
|
|
public static record SimpleSliceWithoutRelease(AbstractSlice<?> slice, @Nullable ByteBuf byteBuf,
|
|
|
|
@Nullable Object additionalData) implements ReleasableSlice {}
|
|
|
|
|
|
|
|
public static record ReleasableSliceImpl(AbstractSlice<?> slice, @Nullable ByteBuf byteBuf,
|
|
|
|
@Nullable Object additionalData) implements ReleasableSlice {
|
2021-04-30 19:15:04 +02:00
|
|
|
|
2021-05-21 00:19:40 +02:00
|
|
|
@Override
|
2021-04-30 19:15:04 +02:00
|
|
|
public void release() {
|
|
|
|
slice.clear();
|
|
|
|
if (byteBuf != null) {
|
|
|
|
byteBuf.release();
|
|
|
|
}
|
2021-03-20 12:41:11 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-30 00:24:55 +01:00
|
|
|
public Mono<Void> clear() {
|
|
|
|
return Mono
|
|
|
|
.<Void>fromCallable(() -> {
|
2021-06-27 16:52:45 +02:00
|
|
|
try (var readOpts = new ReadOptions(getReadOptions(null))) {
|
|
|
|
readOpts.setVerifyChecksums(VERIFY_CHECKSUMS_WHEN_NOT_NEEDED);
|
|
|
|
|
|
|
|
// readOpts.setIgnoreRangeDeletions(true);
|
|
|
|
readOpts.setFillCache(false);
|
|
|
|
readOpts.setReadaheadSize(32 * 1024); // 32KiB
|
|
|
|
try (CappedWriteBatch writeBatch = new CappedWriteBatch(db,
|
|
|
|
CAPPED_WRITE_BATCH_CAP,
|
|
|
|
RESERVED_WRITE_BATCH_SIZE,
|
|
|
|
MAX_WRITE_BATCH_SIZE,
|
|
|
|
BATCH_WRITE_OPTIONS
|
|
|
|
)) {
|
|
|
|
|
|
|
|
byte[] firstDeletedKey = null;
|
|
|
|
byte[] lastDeletedKey = null;
|
|
|
|
try (RocksIterator rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
rocksIterator.seekToLast();
|
|
|
|
|
|
|
|
rocksIterator.status();
|
|
|
|
if (rocksIterator.isValid()) {
|
|
|
|
firstDeletedKey = FIRST_KEY;
|
|
|
|
lastDeletedKey = rocksIterator.key();
|
|
|
|
writeBatch.deleteRange(cfh, FIRST_KEY, rocksIterator.key());
|
|
|
|
writeBatch.delete(cfh, rocksIterator.key());
|
|
|
|
}
|
2021-03-14 03:13:19 +01:00
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-06-27 16:52:45 +02:00
|
|
|
writeBatch.writeToDbAndClose();
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-03-20 12:41:11 +01:00
|
|
|
|
2021-06-27 16:52:45 +02:00
|
|
|
// Compact range
|
|
|
|
db.suggestCompactRange(cfh);
|
|
|
|
if (firstDeletedKey != null && lastDeletedKey != null) {
|
|
|
|
db.compactRange(cfh,
|
|
|
|
firstDeletedKey,
|
|
|
|
lastDeletedKey,
|
|
|
|
new CompactRangeOptions()
|
|
|
|
.setAllowWriteStall(false)
|
|
|
|
.setExclusiveManualCompaction(false)
|
|
|
|
.setChangeLevel(false)
|
|
|
|
);
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-06-27 16:52:45 +02:00
|
|
|
db.flush(new FlushOptions().setWaitForFlush(true).setAllowWriteStall(true), cfh);
|
|
|
|
db.flushWal(true);
|
|
|
|
}
|
|
|
|
return null;
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-01-30 00:24:55 +01:00
|
|
|
})
|
2021-03-04 22:01:50 +01:00
|
|
|
.onErrorMap(cause -> new IOException("Failed to clear", cause))
|
2021-02-01 02:21:53 +01:00
|
|
|
.subscribeOn(dbScheduler);
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<Long> sizeRange(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono, boolean fast) {
|
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> {
|
|
|
|
if (range.isAll()) {
|
|
|
|
return this
|
|
|
|
.runOnDb(() -> fast ? fastSizeAll(snapshot) : exactSizeAll(snapshot))
|
|
|
|
.onErrorMap(IOException::new);
|
|
|
|
} else {
|
|
|
|
return runOnDb(() -> {
|
|
|
|
try (var readOpts = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
readOpts.setFillCache(false);
|
|
|
|
readOpts.setVerifyChecksums(VERIFY_CHECKSUMS_WHEN_NOT_NEEDED);
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
minBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
maxBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (fast) {
|
|
|
|
readOpts.setIgnoreRangeDeletions(true);
|
|
|
|
|
|
|
|
}
|
|
|
|
try (var rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(),
|
|
|
|
rocksIterator,
|
2021-06-29 23:31:02 +02:00
|
|
|
range.getMin().retain()
|
|
|
|
);
|
2021-05-12 21:41:47 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.seekToFirst();
|
2021-05-12 21:41:47 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
long i = 0;
|
|
|
|
rocksIterator.status();
|
|
|
|
while (rocksIterator.isValid()) {
|
|
|
|
rocksIterator.next();
|
|
|
|
rocksIterator.status();
|
|
|
|
i++;
|
2021-05-12 21:41:47 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
return i;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
maxBound.release();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
minBound.release();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}).onErrorMap(cause -> new IOException("Failed to get size of range " + range, cause));
|
|
|
|
}
|
|
|
|
},
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Mono<LLEntry> getOne(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> runOnDb(() -> {
|
|
|
|
try (var readOpts = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
minBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
2021-06-29 23:31:02 +02:00
|
|
|
readOpts,
|
2021-08-16 10:27:47 +02:00
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
2021-06-29 23:31:02 +02:00
|
|
|
);
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound = emptyReleasableSlice();
|
2021-03-18 19:53:32 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
try (var rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), rocksIterator, range.getMin().retain());
|
2021-05-12 01:25:59 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.seekToFirst();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.status();
|
|
|
|
if (rocksIterator.isValid()) {
|
|
|
|
ByteBuf key = LLUtils.readDirectNioBuffer(alloc, rocksIterator::key);
|
|
|
|
try {
|
|
|
|
ByteBuf value = LLUtils.readDirectNioBuffer(alloc, rocksIterator::value);
|
2021-05-12 01:25:59 +02:00
|
|
|
try {
|
2021-08-28 22:42:51 +02:00
|
|
|
return new LLEntry(key, value);
|
2021-05-12 01:25:59 +02:00
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
value.release();
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
key.release();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} else {
|
|
|
|
return null;
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
2021-05-03 02:57:08 +02:00
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound.release();
|
2021-03-14 13:08:03 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-03-14 13:08:03 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
}),
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-02-02 15:36:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-16 10:27:47 +02:00
|
|
|
public Mono<ByteBuf> getOneKey(@Nullable LLSnapshot snapshot, Mono<LLRange> rangeMono) {
|
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> runOnDb(() -> {
|
|
|
|
try (var readOpts = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
minBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
2021-06-29 23:31:02 +02:00
|
|
|
readOpts,
|
2021-08-16 10:27:47 +02:00
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
2021-06-29 23:31:02 +02:00
|
|
|
);
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound = emptyReleasableSlice();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
try (var rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), rocksIterator, range.getMin().retain());
|
2021-05-12 01:25:59 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.seekToFirst();
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
ByteBuf key;
|
|
|
|
rocksIterator.status();
|
|
|
|
if (rocksIterator.isValid()) {
|
|
|
|
key = LLUtils.readDirectNioBuffer(alloc, rocksIterator::key);
|
|
|
|
return key;
|
|
|
|
} else {
|
|
|
|
return null;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound.release();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-02-02 15:36:11 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
}),
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2021-02-02 15:36:11 +01:00
|
|
|
}
|
|
|
|
|
2021-06-27 11:58:12 +02:00
|
|
|
private long fastSizeAll(@Nullable LLSnapshot snapshot) throws RocksDBException {
|
2021-06-19 21:55:20 +02:00
|
|
|
try (var rocksdbSnapshot = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
if (USE_CURRENT_FASTSIZE_FOR_OLD_SNAPSHOTS || rocksdbSnapshot.snapshot() == null) {
|
|
|
|
try {
|
|
|
|
return db.getLongProperty(cfh, "rocksdb.estimate-num-keys");
|
|
|
|
} catch (RocksDBException e) {
|
|
|
|
e.printStackTrace();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
} else if (PARALLEL_EXACT_SIZE) {
|
|
|
|
return exactSizeAll(snapshot);
|
|
|
|
} else {
|
|
|
|
rocksdbSnapshot.setFillCache(false);
|
|
|
|
rocksdbSnapshot.setVerifyChecksums(VERIFY_CHECKSUMS_WHEN_NOT_NEEDED);
|
|
|
|
rocksdbSnapshot.setIgnoreRangeDeletions(true);
|
|
|
|
long count = 0;
|
2021-06-27 11:58:12 +02:00
|
|
|
try (RocksIterator rocksIterator = db.newIterator(cfh, rocksdbSnapshot)) {
|
|
|
|
rocksIterator.seekToFirst();
|
|
|
|
rocksIterator.status();
|
2021-06-19 21:55:20 +02:00
|
|
|
// If it's a fast size of a snapshot, count only up to 100'000 elements
|
2021-06-27 11:58:12 +02:00
|
|
|
while (rocksIterator.isValid() && count < 100_000) {
|
2021-06-19 21:55:20 +02:00
|
|
|
count++;
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.next();
|
|
|
|
rocksIterator.status();
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
|
|
|
return count;
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-30 00:24:55 +01:00
|
|
|
private long exactSizeAll(@Nullable LLSnapshot snapshot) {
|
2021-06-19 21:55:20 +02:00
|
|
|
try (var readOpts = new ReadOptions(resolveSnapshot(snapshot))) {
|
|
|
|
readOpts.setFillCache(false);
|
|
|
|
readOpts.setReadaheadSize(32 * 1024); // 32KiB
|
|
|
|
readOpts.setVerifyChecksums(VERIFY_CHECKSUMS_WHEN_NOT_NEEDED);
|
|
|
|
|
|
|
|
if (PARALLEL_EXACT_SIZE) {
|
|
|
|
var commonPool = ForkJoinPool.commonPool();
|
|
|
|
var futures = IntStream
|
|
|
|
.range(-1, LLUtils.LEXICONOGRAPHIC_ITERATION_SEEKS.length)
|
|
|
|
.mapToObj(idx -> Pair.of(idx == -1 ? new byte[0] : LLUtils.LEXICONOGRAPHIC_ITERATION_SEEKS[idx],
|
|
|
|
idx + 1 >= LLUtils.LEXICONOGRAPHIC_ITERATION_SEEKS.length ? null
|
|
|
|
: LLUtils.LEXICONOGRAPHIC_ITERATION_SEEKS[idx + 1]
|
|
|
|
))
|
|
|
|
.map(range -> (Callable<Long>) () -> {
|
|
|
|
long partialCount = 0;
|
|
|
|
try (var rangeReadOpts = new ReadOptions(readOpts)) {
|
|
|
|
Slice sliceBegin;
|
|
|
|
if (range.getKey() != null) {
|
|
|
|
sliceBegin = new Slice(range.getKey());
|
|
|
|
} else {
|
|
|
|
sliceBegin = null;
|
|
|
|
}
|
|
|
|
Slice sliceEnd;
|
|
|
|
if (range.getValue() != null) {
|
|
|
|
sliceEnd = new Slice(range.getValue());
|
|
|
|
} else {
|
|
|
|
sliceEnd = null;
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
if (sliceBegin != null) {
|
|
|
|
rangeReadOpts.setIterateLowerBound(sliceBegin);
|
|
|
|
}
|
|
|
|
if (sliceBegin != null) {
|
|
|
|
rangeReadOpts.setIterateUpperBound(sliceEnd);
|
|
|
|
}
|
2021-06-27 11:58:12 +02:00
|
|
|
try (RocksIterator rocksIterator = db.newIterator(cfh, rangeReadOpts)) {
|
|
|
|
rocksIterator.seekToFirst();
|
|
|
|
rocksIterator.status();
|
|
|
|
while (rocksIterator.isValid()) {
|
2021-06-19 21:55:20 +02:00
|
|
|
partialCount++;
|
2021-06-27 11:58:12 +02:00
|
|
|
rocksIterator.next();
|
|
|
|
rocksIterator.status();
|
2021-06-19 21:55:20 +02:00
|
|
|
}
|
|
|
|
return partialCount;
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
if (sliceBegin != null) {
|
|
|
|
sliceBegin.close();
|
|
|
|
}
|
|
|
|
if (sliceEnd != null) {
|
|
|
|
sliceEnd.close();
|
|
|
|
}
|
2021-03-18 19:53:32 +01:00
|
|
|
}
|
|
|
|
}
|
2021-06-19 21:55:20 +02:00
|
|
|
})
|
|
|
|
.map(commonPool::submit)
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
long count = 0;
|
|
|
|
for (ForkJoinTask<Long> future : futures) {
|
|
|
|
count += future.join();
|
2021-03-18 19:53:32 +01:00
|
|
|
}
|
|
|
|
return count;
|
2021-06-19 21:55:20 +02:00
|
|
|
} else {
|
|
|
|
long count = 0;
|
|
|
|
try (RocksIterator iter = db.newIterator(cfh, readOpts)) {
|
|
|
|
iter.seekToFirst();
|
|
|
|
while (iter.isValid()) {
|
|
|
|
count++;
|
|
|
|
iter.next();
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
2021-03-18 19:53:32 +01:00
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-08-28 22:42:51 +02:00
|
|
|
public Mono<LLEntry> removeOne(Mono<LLRange> rangeMono) {
|
2021-08-16 10:27:47 +02:00
|
|
|
return Mono.usingWhen(rangeMono,
|
|
|
|
range -> runOnDb(() -> {
|
|
|
|
try (var readOpts = new ReadOptions(getReadOptions(null))) {
|
|
|
|
ReleasableSlice minBound;
|
|
|
|
if (range.hasMin()) {
|
|
|
|
minBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
|
|
|
readOpts,
|
|
|
|
IterateBound.LOWER,
|
|
|
|
range.getMin().retain()
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
minBound = emptyReleasableSlice();
|
|
|
|
}
|
|
|
|
try {
|
|
|
|
ReleasableSlice maxBound;
|
|
|
|
if (range.hasMax()) {
|
|
|
|
maxBound = setIterateBound(databaseOptions.allowNettyDirect(),
|
2021-06-29 23:31:02 +02:00
|
|
|
readOpts,
|
2021-08-16 10:27:47 +02:00
|
|
|
IterateBound.UPPER,
|
|
|
|
range.getMax().retain()
|
2021-06-29 23:31:02 +02:00
|
|
|
);
|
2021-05-03 02:57:08 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound = emptyReleasableSlice();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
try (RocksIterator rocksIterator = db.newIterator(cfh, readOpts)) {
|
|
|
|
if (!LLLocalDictionary.PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
|
|
|
rocksIterSeekTo(databaseOptions.allowNettyDirect(), rocksIterator, range.getMin().retain());
|
2021-05-12 01:25:59 +02:00
|
|
|
} else {
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.seekToFirst();
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
rocksIterator.status();
|
|
|
|
if (!rocksIterator.isValid()) {
|
|
|
|
return null;
|
2021-05-12 01:25:59 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
ByteBuf key = LLUtils.readDirectNioBuffer(alloc, rocksIterator::key);
|
|
|
|
ByteBuf value = LLUtils.readDirectNioBuffer(alloc, rocksIterator::value);
|
|
|
|
dbDelete(cfh, null, key);
|
2021-08-28 22:42:51 +02:00
|
|
|
return new LLEntry(key, value);
|
2021-05-12 01:25:59 +02:00
|
|
|
} finally {
|
2021-08-16 10:27:47 +02:00
|
|
|
maxBound.release();
|
2021-05-03 02:57:08 +02:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
} finally {
|
|
|
|
minBound.release();
|
2021-01-30 00:24:55 +01:00
|
|
|
}
|
2021-08-16 10:27:47 +02:00
|
|
|
}
|
|
|
|
}).onErrorMap(cause -> new IOException("Failed to delete " + range.toString(), cause)),
|
|
|
|
range -> Mono.fromRunnable(range::release)
|
|
|
|
);
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
2021-04-03 19:09:06 +02:00
|
|
|
|
|
|
|
@NotNull
|
2021-06-29 23:31:02 +02:00
|
|
|
public static Tuple3<RocksIterator, ReleasableSlice, ReleasableSlice> getRocksIterator(boolean allowNettyDirect,
|
|
|
|
ReadOptions readOptions,
|
2021-04-03 19:09:06 +02:00
|
|
|
LLRange range,
|
|
|
|
RocksDB db,
|
|
|
|
ColumnFamilyHandle cfh) {
|
2021-04-30 19:15:04 +02:00
|
|
|
try {
|
|
|
|
ReleasableSlice sliceMin;
|
|
|
|
ReleasableSlice sliceMax;
|
|
|
|
if (range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
sliceMin = setIterateBound(allowNettyDirect, readOptions, IterateBound.LOWER, range.getMin().retain());
|
2021-04-30 19:15:04 +02:00
|
|
|
} else {
|
2021-05-02 19:18:15 +02:00
|
|
|
sliceMin = emptyReleasableSlice();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
if (range.hasMax()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
sliceMax = setIterateBound(allowNettyDirect, readOptions, IterateBound.UPPER, range.getMax().retain());
|
2021-04-30 19:15:04 +02:00
|
|
|
} else {
|
2021-05-02 19:18:15 +02:00
|
|
|
sliceMax = emptyReleasableSlice();
|
2021-04-30 19:15:04 +02:00
|
|
|
}
|
|
|
|
var rocksIterator = db.newIterator(cfh, readOptions);
|
|
|
|
if (!PREFER_SEEK_TO_FIRST && range.hasMin()) {
|
2021-06-29 23:31:02 +02:00
|
|
|
rocksIterSeekTo(allowNettyDirect, rocksIterator, range.getMin().retain());
|
2021-04-30 19:15:04 +02:00
|
|
|
} else {
|
|
|
|
rocksIterator.seekToFirst();
|
|
|
|
}
|
|
|
|
return Tuples.of(rocksIterator, sliceMin, sliceMax);
|
|
|
|
} finally {
|
|
|
|
range.release();
|
2021-04-03 19:09:06 +02:00
|
|
|
}
|
|
|
|
}
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|