c9042db619
Summary: Range Locking supports Lock Escalation. Lock Escalation is invoked when lock memory is nearly exhausted and it reduced the amount of memory used by joining adjacent locks. Bridging the gap between certain locks has adverse effects. For example, in MyRocks it is not a good idea to bridge the gap between locks in different indexes, as that get the lock to cover large portions of indexes, or even entire indexes. Resolve this by introducing Escalation Barrier. The escalation process will call the user-provided barrier callback function: bool(const Endpoint& a, const Endpoint& b) If the function returns true, there's a barrier between a and b and Lock Escalation will not try to bridge the gap between a and b. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9290 Reviewed By: akankshamahajan15 Differential Revision: D33486753 Pulled By: riversand963 fbshipit-source-id: f97910b67aba0579ea1d35f523ca6863d3dd018e
423 lines
12 KiB
C++
423 lines
12 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#ifndef ROCKSDB_LITE
|
|
#ifndef OS_WIN
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <string>
|
|
#include <thread>
|
|
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "port/port.h"
|
|
#include "rocksdb/db.h"
|
|
#include "rocksdb/options.h"
|
|
#include "rocksdb/perf_context.h"
|
|
#include "rocksdb/utilities/transaction.h"
|
|
#include "rocksdb/utilities/transaction_db.h"
|
|
#include "utilities/transactions/lock/point/point_lock_manager_test.h"
|
|
#include "utilities/transactions/pessimistic_transaction_db.h"
|
|
#include "utilities/transactions/transaction_test.h"
|
|
|
|
using std::string;
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class RangeLockingTest : public ::testing::Test {
|
|
public:
|
|
TransactionDB* db;
|
|
std::string dbname;
|
|
Options options;
|
|
|
|
std::shared_ptr<RangeLockManagerHandle> range_lock_mgr;
|
|
TransactionDBOptions txn_db_options;
|
|
|
|
RangeLockingTest() : db(nullptr) {
|
|
options.create_if_missing = true;
|
|
dbname = test::PerThreadDBPath("range_locking_testdb");
|
|
|
|
DestroyDB(dbname, options);
|
|
|
|
range_lock_mgr.reset(NewRangeLockManager(nullptr));
|
|
txn_db_options.lock_mgr_handle = range_lock_mgr;
|
|
|
|
auto s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
|
assert(s.ok());
|
|
}
|
|
|
|
~RangeLockingTest() {
|
|
delete db;
|
|
db = nullptr;
|
|
// This is to skip the assert statement in FaultInjectionTestEnv. There
|
|
// seems to be a bug in btrfs that the makes readdir return recently
|
|
// unlink-ed files. By using the default fs we simply ignore errors resulted
|
|
// from attempting to delete such files in DestroyDB.
|
|
DestroyDB(dbname, options);
|
|
}
|
|
|
|
PessimisticTransaction* NewTxn(
|
|
TransactionOptions txn_opt = TransactionOptions()) {
|
|
Transaction* txn = db->BeginTransaction(WriteOptions(), txn_opt);
|
|
return reinterpret_cast<PessimisticTransaction*>(txn);
|
|
}
|
|
};
|
|
|
|
// TODO: set a smaller lock wait timeout so that the test runs faster.
|
|
TEST_F(RangeLockingTest, BasicRangeLocking) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
std::string value;
|
|
ReadOptions read_options;
|
|
auto cf = db->DefaultColumnFamily();
|
|
|
|
Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
|
|
Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
|
|
|
|
// Get a range lock
|
|
ASSERT_OK(txn0->GetRangeLock(cf, Endpoint("a"), Endpoint("c")));
|
|
|
|
// Check that range Lock inhibits an overlapping range lock
|
|
{
|
|
auto s = txn1->GetRangeLock(cf, Endpoint("b"), Endpoint("z"));
|
|
ASSERT_TRUE(s.IsTimedOut());
|
|
}
|
|
|
|
// Check that range Lock inhibits an overlapping point lock
|
|
{
|
|
auto s = txn1->GetForUpdate(read_options, cf, Slice("b"), &value);
|
|
ASSERT_TRUE(s.IsTimedOut());
|
|
}
|
|
|
|
// Get a point lock, check that it inhibits range locks
|
|
ASSERT_OK(txn0->Put(cf, Slice("n"), Slice("value")));
|
|
{
|
|
auto s = txn1->GetRangeLock(cf, Endpoint("m"), Endpoint("p"));
|
|
ASSERT_TRUE(s.IsTimedOut());
|
|
}
|
|
|
|
ASSERT_OK(txn0->Commit());
|
|
txn1->Rollback();
|
|
|
|
delete txn0;
|
|
delete txn1;
|
|
}
|
|
|
|
TEST_F(RangeLockingTest, MyRocksLikeUpdate) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
|
|
auto cf = db->DefaultColumnFamily();
|
|
Status s;
|
|
|
|
// Get a range lock for the range we are about to update
|
|
ASSERT_OK(txn0->GetRangeLock(cf, Endpoint("a"), Endpoint("c")));
|
|
|
|
bool try_range_lock_called = false;
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
"RangeTreeLockManager::TryRangeLock:enter",
|
|
[&](void* /*arg*/) { try_range_lock_called = true; });
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
|
|
// For performance reasons, the following must NOT call lock_mgr->TryLock():
|
|
// We verify that by checking the value of try_range_lock_called.
|
|
ASSERT_OK(txn0->Put(cf, Slice("b"), Slice("value"),
|
|
/*assume_tracked=*/true));
|
|
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
|
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
ASSERT_FALSE(try_range_lock_called);
|
|
|
|
txn0->Rollback();
|
|
|
|
delete txn0;
|
|
}
|
|
|
|
TEST_F(RangeLockingTest, UpgradeLockAndGetConflict) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
auto cf = db->DefaultColumnFamily();
|
|
Status s;
|
|
std::string value;
|
|
txn_options.lock_timeout= 10;
|
|
|
|
Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
|
|
Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
|
|
|
|
// Get the shared lock in txn0
|
|
s = txn0->GetForUpdate(ReadOptions(), cf,
|
|
Slice("a"), &value,
|
|
false /*exclusive*/);
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
// Get the shared lock on the same key in txn1
|
|
s = txn1->GetForUpdate(ReadOptions(), cf,
|
|
Slice("a"), &value,
|
|
false /*exclusive*/);
|
|
ASSERT_TRUE(s.IsNotFound());
|
|
|
|
// Now, try getting an exclusive lock that overlaps with the above
|
|
s = txn0->GetRangeLock(cf, Endpoint("a"), Endpoint("b"));
|
|
ASSERT_TRUE(s.IsTimedOut());
|
|
|
|
txn0->Rollback();
|
|
txn1->Rollback();
|
|
|
|
delete txn0;
|
|
delete txn1;
|
|
}
|
|
|
|
|
|
TEST_F(RangeLockingTest, SnapshotValidation) {
|
|
Status s;
|
|
Slice key_slice = Slice("k");
|
|
ColumnFamilyHandle* cfh = db->DefaultColumnFamily();
|
|
|
|
auto txn0 = NewTxn();
|
|
txn0->Put(key_slice, Slice("initial"));
|
|
txn0->Commit();
|
|
|
|
// txn1
|
|
auto txn1 = NewTxn();
|
|
txn1->SetSnapshot();
|
|
std::string val1;
|
|
ASSERT_OK(txn1->Get(ReadOptions(), cfh, key_slice, &val1));
|
|
ASSERT_EQ(val1, "initial");
|
|
val1 = val1 + std::string("-txn1");
|
|
|
|
ASSERT_OK(txn1->Put(cfh, key_slice, Slice(val1)));
|
|
|
|
// txn2
|
|
auto txn2 = NewTxn();
|
|
txn2->SetSnapshot();
|
|
std::string val2;
|
|
// This will see the original value as nothing is committed
|
|
// This is also Get, so it is doesn't acquire any locks.
|
|
ASSERT_OK(txn2->Get(ReadOptions(), cfh, key_slice, &val2));
|
|
ASSERT_EQ(val2, "initial");
|
|
|
|
// txn1
|
|
ASSERT_OK(txn1->Commit());
|
|
|
|
// txn2
|
|
val2 = val2 + std::string("-txn2");
|
|
// Now, this call should do Snapshot Validation and fail:
|
|
s = txn2->Put(cfh, key_slice, Slice(val2));
|
|
ASSERT_TRUE(s.IsBusy());
|
|
|
|
ASSERT_OK(txn2->Commit());
|
|
|
|
delete txn0;
|
|
delete txn1;
|
|
delete txn2;
|
|
}
|
|
|
|
TEST_F(RangeLockingTest, MultipleTrxLockStatusData) {
|
|
WriteOptions write_options;
|
|
TransactionOptions txn_options;
|
|
auto cf = db->DefaultColumnFamily();
|
|
|
|
Transaction* txn0 = db->BeginTransaction(write_options, txn_options);
|
|
Transaction* txn1 = db->BeginTransaction(write_options, txn_options);
|
|
|
|
// Get a range lock
|
|
ASSERT_OK(txn0->GetRangeLock(cf, Endpoint("z"), Endpoint("z")));
|
|
ASSERT_OK(txn1->GetRangeLock(cf, Endpoint("b"), Endpoint("e")));
|
|
|
|
auto s = range_lock_mgr->GetRangeLockStatusData();
|
|
ASSERT_EQ(s.size(), 2);
|
|
for (auto it = s.begin(); it != s.end(); ++it) {
|
|
ASSERT_EQ(it->first, cf->GetID());
|
|
auto val = it->second;
|
|
ASSERT_FALSE(val.start.inf_suffix);
|
|
ASSERT_FALSE(val.end.inf_suffix);
|
|
ASSERT_TRUE(val.exclusive);
|
|
ASSERT_EQ(val.ids.size(), 1);
|
|
if (val.ids[0] == txn0->GetID()) {
|
|
ASSERT_EQ(val.start.slice, "z");
|
|
ASSERT_EQ(val.end.slice, "z");
|
|
} else if (val.ids[0] == txn1->GetID()) {
|
|
ASSERT_EQ(val.start.slice, "b");
|
|
ASSERT_EQ(val.end.slice, "e");
|
|
} else {
|
|
FAIL(); // Unknown transaction ID.
|
|
}
|
|
}
|
|
|
|
delete txn0;
|
|
delete txn1;
|
|
}
|
|
|
|
#if defined(__has_feature)
|
|
#if __has_feature(thread_sanitizer)
|
|
#define SKIP_LOCK_ESCALATION_TEST 1
|
|
#endif
|
|
#else
|
|
#define SKIP_LOCK_ESCALATION_TEST 1
|
|
#endif
|
|
|
|
#ifndef SKIP_LOCK_ESCALATION_TEST
|
|
TEST_F(RangeLockingTest, BasicLockEscalation) {
|
|
auto cf = db->DefaultColumnFamily();
|
|
|
|
auto counters = range_lock_mgr->GetStatus();
|
|
|
|
// Initially not using any lock memory
|
|
ASSERT_EQ(counters.current_lock_memory, 0);
|
|
ASSERT_EQ(counters.escalation_count, 0);
|
|
|
|
ASSERT_EQ(0, range_lock_mgr->SetMaxLockMemory(2000));
|
|
|
|
// Insert until we see lock escalations
|
|
auto txn = NewTxn();
|
|
|
|
// Get the locks until we hit an escalation
|
|
for (int i = 0; i < 2020; i++) {
|
|
std::ostringstream buf;
|
|
buf << std::setw(8) << std::setfill('0') << i;
|
|
std::string buf_str = buf.str();
|
|
ASSERT_OK(txn->GetRangeLock(cf, Endpoint(buf_str), Endpoint(buf_str)));
|
|
}
|
|
counters = range_lock_mgr->GetStatus();
|
|
ASSERT_GT(counters.escalation_count, 0);
|
|
ASSERT_LE(counters.current_lock_memory, 2000);
|
|
|
|
delete txn;
|
|
}
|
|
|
|
// An escalation barrier function. Allow escalation iff the first two bytes are
|
|
// identical.
|
|
static bool escalation_barrier(const Endpoint& a, const Endpoint& b) {
|
|
assert(a.slice.size() > 2);
|
|
assert(b.slice.size() > 2);
|
|
if (memcmp(a.slice.data(), b.slice.data(), 2)) {
|
|
return true; // This is a barrier
|
|
} else {
|
|
return false; // No barrier
|
|
}
|
|
}
|
|
|
|
TEST_F(RangeLockingTest, LockEscalationBarrier) {
|
|
auto cf = db->DefaultColumnFamily();
|
|
|
|
auto counters = range_lock_mgr->GetStatus();
|
|
|
|
// Initially not using any lock memory
|
|
ASSERT_EQ(counters.escalation_count, 0);
|
|
|
|
range_lock_mgr->SetMaxLockMemory(8000);
|
|
range_lock_mgr->SetEscalationBarrierFunc(escalation_barrier);
|
|
|
|
// Insert enough locks to cause lock escalations to happen
|
|
auto txn = NewTxn();
|
|
const int N = 2000;
|
|
for (int i = 0; i < N; i++) {
|
|
std::ostringstream buf;
|
|
buf << std::setw(4) << std::setfill('0') << i;
|
|
std::string buf_str = buf.str();
|
|
ASSERT_OK(txn->GetRangeLock(cf, Endpoint(buf_str), Endpoint(buf_str)));
|
|
}
|
|
counters = range_lock_mgr->GetStatus();
|
|
ASSERT_GT(counters.escalation_count, 0);
|
|
|
|
// Check that lock escalation was not performed across escalation barriers:
|
|
// Use another txn to acquire locks near the barriers.
|
|
auto txn2 = NewTxn();
|
|
range_lock_mgr->SetMaxLockMemory(500000);
|
|
for (int i = 100; i < N; i += 100) {
|
|
std::ostringstream buf;
|
|
buf << std::setw(4) << std::setfill('0') << i - 1 << "-a";
|
|
std::string buf_str = buf.str();
|
|
// Check that we CAN get a lock near the escalation barrier
|
|
ASSERT_OK(txn2->GetRangeLock(cf, Endpoint(buf_str), Endpoint(buf_str)));
|
|
}
|
|
|
|
txn->Rollback();
|
|
txn2->Rollback();
|
|
delete txn;
|
|
delete txn2;
|
|
}
|
|
|
|
#endif
|
|
|
|
TEST_F(RangeLockingTest, LockWaitCount) {
|
|
TransactionOptions txn_options;
|
|
auto cf = db->DefaultColumnFamily();
|
|
txn_options.lock_timeout = 50;
|
|
Transaction* txn0 = db->BeginTransaction(WriteOptions(), txn_options);
|
|
Transaction* txn1 = db->BeginTransaction(WriteOptions(), txn_options);
|
|
|
|
// Get a range lock
|
|
ASSERT_OK(txn0->GetRangeLock(cf, Endpoint("a"), Endpoint("c")));
|
|
|
|
uint64_t lock_waits1 = range_lock_mgr->GetStatus().lock_wait_count;
|
|
// Attempt to get a conflicting lock
|
|
auto s = txn1->GetRangeLock(cf, Endpoint("b"), Endpoint("z"));
|
|
ASSERT_TRUE(s.IsTimedOut());
|
|
|
|
// Check that the counter was incremented
|
|
uint64_t lock_waits2 = range_lock_mgr->GetStatus().lock_wait_count;
|
|
ASSERT_EQ(lock_waits1 + 1, lock_waits2);
|
|
|
|
txn0->Rollback();
|
|
txn1->Rollback();
|
|
|
|
delete txn0;
|
|
delete txn1;
|
|
}
|
|
|
|
void PointLockManagerTestExternalSetup(PointLockManagerTest* self) {
|
|
self->env_ = Env::Default();
|
|
self->db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
|
|
ASSERT_OK(self->env_->CreateDir(self->db_dir_));
|
|
|
|
Options opt;
|
|
opt.create_if_missing = true;
|
|
TransactionDBOptions txn_opt;
|
|
txn_opt.transaction_lock_timeout = 0;
|
|
|
|
auto mutex_factory = std::make_shared<TransactionDBMutexFactoryImpl>();
|
|
self->locker_.reset(NewRangeLockManager(mutex_factory)->getLockManager());
|
|
std::shared_ptr<RangeLockManagerHandle> range_lock_mgr =
|
|
std::dynamic_pointer_cast<RangeLockManagerHandle>(self->locker_);
|
|
txn_opt.lock_mgr_handle = range_lock_mgr;
|
|
|
|
ASSERT_OK(TransactionDB::Open(opt, txn_opt, self->db_dir_, &self->db_));
|
|
self->wait_sync_point_name_ = "RangeTreeLockManager::TryRangeLock:WaitingTxn";
|
|
}
|
|
|
|
INSTANTIATE_TEST_CASE_P(RangeLockManager, AnyLockManagerTest,
|
|
::testing::Values(PointLockManagerTestExternalSetup));
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|
|
int main(int argc, char** argv) {
|
|
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|
|
|
|
#else // OS_WIN
|
|
|
|
#include <stdio.h>
|
|
int main(int /*argc*/, char** /*argv*/) {
|
|
fprintf(stderr, "skipped as Range Locking is not supported on Windows\n");
|
|
return 0;
|
|
}
|
|
|
|
#endif // OS_WIN
|
|
|
|
#else
|
|
#include <stdio.h>
|
|
|
|
int main(int /*argc*/, char** /*argv*/) {
|
|
fprintf(stderr,
|
|
"skipped as transactions are not supported in rocksdb_lite\n");
|
|
return 0;
|
|
}
|
|
|
|
#endif // ROCKSDB_LITE
|