Add more unit test to write_prepared txns
Summary: Closes https://github.com/facebook/rocksdb/pull/2798 Differential Revision: D5724173 Pulled By: maysamyabandeh fbshipit-source-id: fb6b782d933fb4be315b1a231a6a67a66fdc9c96
This commit is contained in:
parent
06b37eef7b
commit
26ac24f199
@ -21,6 +21,7 @@ env:
|
||||
- TEST_GROUP=platform_dependent # 16-18 minutes
|
||||
- TEST_GROUP=1 # 33-35 minutes
|
||||
- TEST_GROUP=2 # 30-32 minutes
|
||||
- TEST_GROUP=3 # ? minutes - under development
|
||||
# Run java tests
|
||||
- JOB_NAME=java_test # 4-11 minutes
|
||||
# Build ROCKSDB_LITE
|
||||
@ -36,6 +37,8 @@ matrix:
|
||||
env: TEST_GROUP=1
|
||||
- os: osx
|
||||
env: TEST_GROUP=2
|
||||
- os: osx
|
||||
env: TEST_GROUP=3
|
||||
- os : osx
|
||||
env: JOB_NAME=cmake-mingw
|
||||
- os : linux
|
||||
@ -59,7 +62,8 @@ script:
|
||||
- ${CXX} --version
|
||||
- if [ "${TEST_GROUP}" == 'platform_dependent' ]; then ccache -C && OPT=-DTRAVIS V=1 ROCKSDBTESTS_END=db_block_cache_test make -j4 all_but_some_tests check_some; fi
|
||||
- if [ "${TEST_GROUP}" == '1' ]; then OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=db_block_cache_test ROCKSDBTESTS_END=comparator_db_test make -j4 check_some; fi
|
||||
- if [ "${TEST_GROUP}" == '2' ]; then OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=comparator_db_test make -j4 check_some; fi
|
||||
- if [ "${TEST_GROUP}" == '2' ]; then OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=comparator_db_test ROCKSDBTESTS_END=write_prepared_transaction_test make -j4 check_some; fi
|
||||
- if [ "${TEST_GROUP}" == '3' ]; then OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=write_prepared_transaction_test make -j4 check_some; fi
|
||||
- if [ "${JOB_NAME}" == 'java_test' ]; then OPT=-DTRAVIS V=1 make clean jclean && make rocksdbjava jtest; fi
|
||||
- if [ "${JOB_NAME}" == 'lite_build' ]; then OPT="-DTRAVIS -DROCKSDB_LITE" V=1 make -j4 static_lib; fi
|
||||
- if [ "${JOB_NAME}" == 'examples' ]; then OPT=-DTRAVIS V=1 make -j4 static_lib; cd examples; make -j4; fi
|
||||
|
@ -830,6 +830,7 @@ if(WITH_TESTS)
|
||||
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc
|
||||
utilities/transactions/optimistic_transaction_test.cc
|
||||
utilities/transactions/transaction_test.cc
|
||||
utilities/transactions/write_prepared_transaction_test.cc
|
||||
utilities/ttl/ttl_test.cc
|
||||
utilities/write_batch_with_index/write_batch_with_index_test.cc
|
||||
)
|
||||
|
7
Makefile
7
Makefile
@ -476,6 +476,7 @@ TESTS = \
|
||||
object_registry_test \
|
||||
repair_test \
|
||||
env_timed_test \
|
||||
write_prepared_transaction_test \
|
||||
|
||||
PARALLEL_TEST = \
|
||||
backupable_db_test \
|
||||
@ -491,7 +492,8 @@ PARALLEL_TEST = \
|
||||
manual_compaction_test \
|
||||
persistent_cache_test \
|
||||
table_test \
|
||||
transaction_test
|
||||
transaction_test \
|
||||
write_prepared_transaction_test
|
||||
|
||||
SUBSET := $(TESTS)
|
||||
ifdef ROCKSDBTESTS_START
|
||||
@ -1392,6 +1394,9 @@ heap_test: util/heap_test.o $(GTEST)
|
||||
transaction_test: utilities/transactions/transaction_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(AM_LINK)
|
||||
|
||||
write_prepared_transaction_test: utilities/transactions/write_prepared_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(AM_LINK)
|
||||
|
||||
sst_dump: tools/sst_dump.o $(LIBOBJECTS)
|
||||
$(AM_LINK)
|
||||
|
||||
|
10
TARGETS
10
TARGETS
@ -15,7 +15,6 @@ rocksdb_compiler_flags = [
|
||||
"-DROCKSDB_SCHED_GETCPU_PRESENT",
|
||||
"-DROCKSDB_SUPPORT_THREAD_LOCAL",
|
||||
"-DOS_LINUX",
|
||||
"-DROCKSDB_UBSAN_RUN",
|
||||
# Flags to enable libs we include
|
||||
"-DSNAPPY",
|
||||
"-DZLIB",
|
||||
@ -476,7 +475,9 @@ ROCKS_TESTS = [['arena_test', 'util/arena_test.cc', 'serial'],
|
||||
['thread_list_test', 'util/thread_list_test.cc', 'serial'],
|
||||
['thread_local_test', 'util/thread_local_test.cc', 'serial'],
|
||||
['timer_queue_test', 'util/timer_queue_test.cc', 'serial'],
|
||||
['transaction_test', 'utilities/transactions/transaction_test.cc', 'serial'],
|
||||
['transaction_test',
|
||||
'utilities/transactions/transaction_test.cc',
|
||||
'parallel'],
|
||||
['ttl_test', 'utilities/ttl/ttl_test.cc', 'serial'],
|
||||
['util_merge_operators_test',
|
||||
'utilities/util_merge_operators_test.cc',
|
||||
@ -493,7 +494,10 @@ ROCKS_TESTS = [['arena_test', 'util/arena_test.cc', 'serial'],
|
||||
'memtable/write_buffer_manager_test.cc',
|
||||
'serial'],
|
||||
['write_callback_test', 'db/write_callback_test.cc', 'serial'],
|
||||
['write_controller_test', 'db/write_controller_test.cc', 'serial']]
|
||||
['write_controller_test', 'db/write_controller_test.cc', 'serial'],
|
||||
['write_prepared_transaction_test',
|
||||
'utilities/transactions/write_prepared_transaction_test.cc',
|
||||
'serial']]
|
||||
|
||||
|
||||
# Generate a test rule for each entry in ROCKS_TESTS
|
||||
|
1
src.mk
1
src.mk
@ -354,6 +354,7 @@ MAIN_SOURCES = \
|
||||
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \
|
||||
utilities/transactions/optimistic_transaction_test.cc \
|
||||
utilities/transactions/transaction_test.cc \
|
||||
utilities/transactions/write_prepared_transaction_test.cc \
|
||||
utilities/ttl/ttl_test.cc \
|
||||
utilities/write_batch_with_index/write_batch_with_index_test.cc \
|
||||
|
||||
|
@ -46,6 +46,7 @@ extern void TestKillRandom(std::string kill_point, int odds,
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define TEST_SYNC_POINT(x)
|
||||
#define TEST_IDX_SYNC_POINT(x, index)
|
||||
#define TEST_SYNC_POINT_CALLBACK(x, y)
|
||||
#else
|
||||
|
||||
@ -135,6 +136,8 @@ class SyncPoint {
|
||||
// See TransactionLogIteratorRace in db_test.cc for an example use case.
|
||||
// TEST_SYNC_POINT is no op in release build.
|
||||
#define TEST_SYNC_POINT(x) rocksdb::SyncPoint::GetInstance()->Process(x)
|
||||
#define TEST_IDX_SYNC_POINT(x, index) \
|
||||
rocksdb::SyncPoint::GetInstance()->Process(x + std::to_string(index))
|
||||
#define TEST_SYNC_POINT_CALLBACK(x, y) \
|
||||
rocksdb::SyncPoint::GetInstance()->Process(x, y)
|
||||
#endif // NDEBUG
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "rocksdb/utilities/transaction_db.h"
|
||||
#include "util/cast_util.h"
|
||||
#include "util/mutexlock.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "utilities/transactions/pessimistic_transaction.h"
|
||||
#include "utilities/transactions/transaction_db_mutex_impl.h"
|
||||
|
||||
@ -605,121 +606,11 @@ void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq,
|
||||
if (prev_max < evicted.commit_seq) {
|
||||
// TODO(myabandeh) inc max in larger steps to avoid frequent updates
|
||||
auto max_evicted_seq = evicted.commit_seq;
|
||||
// When max_evicted_seq_ advances, move older entries from prepared_txns_
|
||||
// to delayed_prepared_. This guarantees that if a seq is lower than max,
|
||||
// then it is not in prepared_txns_ ans save an expensive, synchronized
|
||||
// lookup from a shared set. delayed_prepared_ is expected to be empty in
|
||||
// normal cases.
|
||||
{
|
||||
WriteLock wl(&prepared_mutex_);
|
||||
while (!prepared_txns_.empty() &&
|
||||
prepared_txns_.top() <= max_evicted_seq) {
|
||||
auto to_be_popped = prepared_txns_.top();
|
||||
delayed_prepared_.insert(to_be_popped);
|
||||
prepared_txns_.pop();
|
||||
delayed_prepared_empty_.store(false, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
// With each change to max_evicted_seq_ fetch the live snapshots behind it
|
||||
SequenceNumber curr_seq;
|
||||
std::vector<SequenceNumber> all_snapshots;
|
||||
bool update_snapshots = false;
|
||||
{
|
||||
InstrumentedMutex(db_impl_->mutex());
|
||||
// We use this to identify how fresh are the snapshot list. Since this
|
||||
// is done atomically with obtaining the snapshot list, the one with
|
||||
// the larger seq is more fresh. If the seq is equal the full snapshot
|
||||
// list could be different since taking snapshots does not increase
|
||||
// the db seq. However since we only care about snapshots before the
|
||||
// new max, such recent snapshots would not be included the in the
|
||||
// list anyway.
|
||||
curr_seq = db_impl_->GetLatestSequenceNumber();
|
||||
if (curr_seq > snapshots_version_) {
|
||||
// This is to avoid updating the snapshots_ if it already updated
|
||||
// with a more recent vesion by a concrrent thread
|
||||
update_snapshots = true;
|
||||
// We only care about snapshots lower then max
|
||||
all_snapshots =
|
||||
db_impl_->snapshots().GetAll(nullptr, max_evicted_seq);
|
||||
}
|
||||
}
|
||||
if (update_snapshots) {
|
||||
WriteLock wl(&snapshots_mutex_);
|
||||
snapshots_version_ = curr_seq;
|
||||
// We update the list concurrently with the readers.
|
||||
// Both new and old lists are sorted and the new list is subset of the
|
||||
// previous list plus some new items. Thus if a snapshot repeats in
|
||||
// both new and old lists, it will appear upper in the new list. So if
|
||||
// we simply insert the new snapshots in order, if an overwritten item
|
||||
// is still valid in the new list is either written to the same place in
|
||||
// the array or it is written in a higher palce before it gets
|
||||
// overwritten by another item. This guarantess a reader that reads the
|
||||
// list bottom-up will eventaully see a snapshot that repeats in the
|
||||
// update, either before it gets overwritten by the writer or
|
||||
// afterwards.
|
||||
size_t i = 0;
|
||||
auto it = all_snapshots.begin();
|
||||
for (; it != all_snapshots.end() && i < SNAPSHOT_CACHE_SIZE;
|
||||
it++, i++) {
|
||||
snapshot_cache_[i].store(*it, std::memory_order_release);
|
||||
}
|
||||
snapshots_.clear();
|
||||
for (; it != all_snapshots.end(); it++) {
|
||||
// Insert them to a vector that is less efficient to access
|
||||
// concurrently
|
||||
snapshots_.push_back(*it);
|
||||
}
|
||||
// Update the size at the end. Otherwise a parallel reader might read
|
||||
// items that are not set yet.
|
||||
snapshots_total_.store(all_snapshots.size(), std::memory_order_release);
|
||||
}
|
||||
while (prev_max < max_evicted_seq &&
|
||||
!max_evicted_seq_.compare_exchange_weak(
|
||||
prev_max, max_evicted_seq, std::memory_order_release,
|
||||
std::memory_order_acquire)) {
|
||||
};
|
||||
AdvanceMaxEvictedSeq(prev_max, max_evicted_seq);
|
||||
}
|
||||
// After each eviction from commit cache, check if the commit entry should
|
||||
// be kept around because it overlaps with a live snapshot.
|
||||
// First check the snapshot cache that is efficient for concurrent access
|
||||
auto cnt = snapshots_total_.load(std::memory_order_acquire);
|
||||
// The list might get updated concurrently as we are reading from it. The
|
||||
// reader should be able to read all the snapshots that are still valid
|
||||
// after the update. Since the survived snapshots are written in a higher
|
||||
// place before gets overwritten the reader that reads bottom-up will
|
||||
// eventully see it.
|
||||
const bool next_is_larger = true;
|
||||
SequenceNumber snapshot_seq = kMaxSequenceNumber;
|
||||
size_t ip1 = std::min(cnt, SNAPSHOT_CACHE_SIZE);
|
||||
for (; 0 < ip1; ip1--) {
|
||||
snapshot_seq = snapshot_cache_[ip1 - 1].load(std::memory_order_acquire);
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq, !next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (UNLIKELY(SNAPSHOT_CACHE_SIZE < cnt && ip1 == SNAPSHOT_CACHE_SIZE &&
|
||||
snapshot_seq < evicted.prep_seq)) {
|
||||
// Then access the less efficient list of snapshots_
|
||||
ReadLock rl(&snapshots_mutex_);
|
||||
// Items could have moved from the snapshots_ to snapshot_cache_ before
|
||||
// accquiring the lock. To make sure that we do not miss a valid snapshot,
|
||||
// read snapshot_cache_ again while holding the lock.
|
||||
for (size_t i = 0; i < SNAPSHOT_CACHE_SIZE; i++) {
|
||||
snapshot_seq = snapshot_cache_[i].load(std::memory_order_acquire);
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq, next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto snapshot_seq_2 : snapshots_) {
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq_2, next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
CheckAgainstSnapshots(evicted);
|
||||
}
|
||||
bool succ =
|
||||
ExchangeCommitEntry(indexed_seq, evicted, {prepare_seq, commit_seq});
|
||||
@ -744,7 +635,7 @@ void WritePreparedTxnDB::AddCommitted(uint64_t prepare_seq,
|
||||
}
|
||||
}
|
||||
|
||||
bool WritePreparedTxnDB::GetCommitEntry(uint64_t indexed_seq,
|
||||
bool WritePreparedTxnDB::GetCommitEntry(const uint64_t indexed_seq,
|
||||
CommitEntry* entry) {
|
||||
// TODO(myabandeh): implement lock-free commit_cache_
|
||||
ReadLock rl(&commit_cache_mutex_);
|
||||
@ -752,8 +643,8 @@ bool WritePreparedTxnDB::GetCommitEntry(uint64_t indexed_seq,
|
||||
return (entry->commit_seq != 0); // initialized
|
||||
}
|
||||
|
||||
bool WritePreparedTxnDB::AddCommitEntry(uint64_t indexed_seq,
|
||||
CommitEntry& new_entry,
|
||||
bool WritePreparedTxnDB::AddCommitEntry(const uint64_t indexed_seq,
|
||||
const CommitEntry& new_entry,
|
||||
CommitEntry* evicted_entry) {
|
||||
// TODO(myabandeh): implement lock-free commit_cache_
|
||||
WriteLock wl(&commit_cache_mutex_);
|
||||
@ -762,9 +653,9 @@ bool WritePreparedTxnDB::AddCommitEntry(uint64_t indexed_seq,
|
||||
return (evicted_entry->commit_seq != 0); // initialized
|
||||
}
|
||||
|
||||
bool WritePreparedTxnDB::ExchangeCommitEntry(uint64_t indexed_seq,
|
||||
CommitEntry& expected_entry,
|
||||
CommitEntry new_entry) {
|
||||
bool WritePreparedTxnDB::ExchangeCommitEntry(const uint64_t indexed_seq,
|
||||
const CommitEntry& expected_entry,
|
||||
const CommitEntry& new_entry) {
|
||||
// TODO(myabandeh): implement lock-free commit_cache_
|
||||
WriteLock wl(&commit_cache_mutex_);
|
||||
auto& evicted_entry = commit_cache_[indexed_seq];
|
||||
@ -775,11 +666,167 @@ bool WritePreparedTxnDB::ExchangeCommitEntry(uint64_t indexed_seq,
|
||||
return true;
|
||||
}
|
||||
|
||||
void WritePreparedTxnDB::AdvanceMaxEvictedSeq(SequenceNumber& prev_max,
|
||||
SequenceNumber& new_max) {
|
||||
// When max_evicted_seq_ advances, move older entries from prepared_txns_
|
||||
// to delayed_prepared_. This guarantees that if a seq is lower than max,
|
||||
// then it is not in prepared_txns_ ans save an expensive, synchronized
|
||||
// lookup from a shared set. delayed_prepared_ is expected to be empty in
|
||||
// normal cases.
|
||||
{
|
||||
WriteLock wl(&prepared_mutex_);
|
||||
while (!prepared_txns_.empty() && prepared_txns_.top() <= new_max) {
|
||||
auto to_be_popped = prepared_txns_.top();
|
||||
delayed_prepared_.insert(to_be_popped);
|
||||
prepared_txns_.pop();
|
||||
delayed_prepared_empty_.store(false, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
// With each change to max_evicted_seq_ fetch the live snapshots behind it
|
||||
SequenceNumber curr_seq;
|
||||
std::vector<SequenceNumber> snapshots;
|
||||
bool update_snapshots = false;
|
||||
{
|
||||
InstrumentedMutex(db_impl_->mutex());
|
||||
// We use this to identify how fresh are the snapshot list. Since this
|
||||
// is done atomically with obtaining the snapshot list, the one with
|
||||
// the larger seq is more fresh. If the seq is equal the full snapshot
|
||||
// list could be different since taking snapshots does not increase
|
||||
// the db seq. However since we only care about snapshots before the
|
||||
// new max, such recent snapshots would not be included the in the
|
||||
// list anyway.
|
||||
curr_seq = db_impl_->GetLatestSequenceNumber();
|
||||
if (curr_seq > snapshots_version_) {
|
||||
// This is to avoid updating the snapshots_ if it already updated
|
||||
// with a more recent vesion by a concrrent thread
|
||||
update_snapshots = true;
|
||||
// We only care about snapshots lower then max
|
||||
snapshots = db_impl_->snapshots().GetAll(nullptr, new_max);
|
||||
}
|
||||
}
|
||||
if (update_snapshots) {
|
||||
UpdateSnapshots(snapshots, curr_seq);
|
||||
}
|
||||
// TODO(myabandeh): check if it worked with relaxed ordering
|
||||
while (prev_max < new_max && !max_evicted_seq_.compare_exchange_weak(
|
||||
prev_max, new_max, std::memory_order_release,
|
||||
std::memory_order_acquire)) {
|
||||
};
|
||||
}
|
||||
|
||||
// 10m entry, 80MB size
|
||||
size_t WritePreparedTxnDB::DEF_COMMIT_CACHE_SIZE = static_cast<size_t>(1 << 21);
|
||||
size_t WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE =
|
||||
static_cast<size_t>(1 << 7);
|
||||
|
||||
void WritePreparedTxnDB::UpdateSnapshots(
|
||||
const std::vector<SequenceNumber>& snapshots,
|
||||
const SequenceNumber& version) {
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:start");
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:start");
|
||||
#ifndef NDEBUG
|
||||
size_t sync_i = 0;
|
||||
#endif
|
||||
WriteLock wl(&snapshots_mutex_);
|
||||
snapshots_version_ = version;
|
||||
// We update the list concurrently with the readers.
|
||||
// Both new and old lists are sorted and the new list is subset of the
|
||||
// previous list plus some new items. Thus if a snapshot repeats in
|
||||
// both new and old lists, it will appear upper in the new list. So if
|
||||
// we simply insert the new snapshots in order, if an overwritten item
|
||||
// is still valid in the new list is either written to the same place in
|
||||
// the array or it is written in a higher palce before it gets
|
||||
// overwritten by another item. This guarantess a reader that reads the
|
||||
// list bottom-up will eventaully see a snapshot that repeats in the
|
||||
// update, either before it gets overwritten by the writer or
|
||||
// afterwards.
|
||||
size_t i = 0;
|
||||
auto it = snapshots.begin();
|
||||
for (; it != snapshots.end() && i < SNAPSHOT_CACHE_SIZE; it++, i++) {
|
||||
snapshot_cache_[i].store(*it, std::memory_order_release);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:", ++sync_i);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:", sync_i);
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
// Release the remaining sync points since they are useless given that the
|
||||
// reader would also use lock to access snapshots
|
||||
for (++sync_i; sync_i <= 10; ++sync_i) {
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:", sync_i);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:", sync_i);
|
||||
}
|
||||
#endif
|
||||
snapshots_.clear();
|
||||
for (; it != snapshots.end(); it++) {
|
||||
// Insert them to a vector that is less efficient to access
|
||||
// concurrently
|
||||
snapshots_.push_back(*it);
|
||||
}
|
||||
// Update the size at the end. Otherwise a parallel reader might read
|
||||
// items that are not set yet.
|
||||
snapshots_total_.store(snapshots.size(), std::memory_order_release);
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:p:end");
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::UpdateSnapshots:s:end");
|
||||
}
|
||||
|
||||
void WritePreparedTxnDB::CheckAgainstSnapshots(const CommitEntry& evicted) {
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:start");
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:start");
|
||||
#ifndef NDEBUG
|
||||
size_t sync_i = 0;
|
||||
#endif
|
||||
// First check the snapshot cache that is efficient for concurrent access
|
||||
auto cnt = snapshots_total_.load(std::memory_order_acquire);
|
||||
// The list might get updated concurrently as we are reading from it. The
|
||||
// reader should be able to read all the snapshots that are still valid
|
||||
// after the update. Since the survived snapshots are written in a higher
|
||||
// place before gets overwritten the reader that reads bottom-up will
|
||||
// eventully see it.
|
||||
const bool next_is_larger = true;
|
||||
SequenceNumber snapshot_seq = kMaxSequenceNumber;
|
||||
size_t ip1 = std::min(cnt, SNAPSHOT_CACHE_SIZE);
|
||||
for (; 0 < ip1; ip1--) {
|
||||
snapshot_seq = snapshot_cache_[ip1 - 1].load(std::memory_order_acquire);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:",
|
||||
++sync_i);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:", sync_i);
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq, !next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
// Release the remaining sync points before accquiring the lock
|
||||
for (++sync_i; sync_i <= 10; ++sync_i) {
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:", sync_i);
|
||||
TEST_IDX_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:", sync_i);
|
||||
}
|
||||
#endif
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:p:end");
|
||||
TEST_SYNC_POINT("WritePreparedTxnDB::CheckAgainstSnapshots:s:end");
|
||||
if (UNLIKELY(SNAPSHOT_CACHE_SIZE < cnt && ip1 == SNAPSHOT_CACHE_SIZE &&
|
||||
snapshot_seq < evicted.prep_seq)) {
|
||||
// Then access the less efficient list of snapshots_
|
||||
ReadLock rl(&snapshots_mutex_);
|
||||
// Items could have moved from the snapshots_ to snapshot_cache_ before
|
||||
// accquiring the lock. To make sure that we do not miss a valid snapshot,
|
||||
// read snapshot_cache_ again while holding the lock.
|
||||
for (size_t i = 0; i < SNAPSHOT_CACHE_SIZE; i++) {
|
||||
snapshot_seq = snapshot_cache_[i].load(std::memory_order_acquire);
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq, next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (auto snapshot_seq_2 : snapshots_) {
|
||||
if (!MaybeUpdateOldCommitMap(evicted.prep_seq, evicted.commit_seq,
|
||||
snapshot_seq_2, next_is_larger)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool WritePreparedTxnDB::MaybeUpdateOldCommitMap(
|
||||
const uint64_t& prep_seq, const uint64_t& commit_seq,
|
||||
const uint64_t& snapshot_seq, const bool next_is_larger = true) {
|
||||
|
@ -109,6 +109,9 @@ class PessimisticTransactionDB : public TransactionDB {
|
||||
uint64_t commit_seq;
|
||||
CommitEntry() : prep_seq(0), commit_seq(0) {}
|
||||
CommitEntry(uint64_t ps, uint64_t cs) : prep_seq(ps), commit_seq(cs) {}
|
||||
bool operator==(const CommitEntry& rhs) const {
|
||||
return prep_seq == rhs.prep_seq && commit_seq == rhs.commit_seq;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
@ -196,6 +199,11 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
|
||||
|
||||
private:
|
||||
friend class WritePreparedTransactionTest_IsInSnapshotTest_Test;
|
||||
friend class WritePreparedTransactionTest_CheckAgainstSnapshotsTest_Test;
|
||||
friend class WritePreparedTransactionTest_CommitMapTest_Test;
|
||||
friend class WritePreparedTransactionTest_SnapshotConcurrentAccessTest_Test;
|
||||
friend class WritePreparedTransactionTest;
|
||||
friend class PreparedHeap_BasicsTest_Test;
|
||||
|
||||
void init(const TransactionDBOptions& /* unused */) {
|
||||
snapshot_cache_ = unique_ptr<std::atomic<SequenceNumber>[]>(
|
||||
@ -223,13 +231,16 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
|
||||
heap_.pop();
|
||||
erased_heap_.pop();
|
||||
}
|
||||
while (heap_.empty() && !erased_heap_.empty()) {
|
||||
erased_heap_.pop();
|
||||
}
|
||||
}
|
||||
void erase(uint64_t seq) {
|
||||
if (!heap_.empty()) {
|
||||
if (seq < heap_.top()) {
|
||||
// Already popped, ignore it.
|
||||
} else if (heap_.top() == seq) {
|
||||
heap_.pop();
|
||||
pop();
|
||||
} else { // (heap_.top() > seq)
|
||||
// Down the heap, remember to pop it later
|
||||
erased_heap_.push(seq);
|
||||
@ -240,17 +251,45 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
|
||||
|
||||
// Get the commit entry with index indexed_seq from the commit table. It
|
||||
// returns true if such entry exists.
|
||||
bool GetCommitEntry(uint64_t indexed_seq, CommitEntry* entry);
|
||||
bool GetCommitEntry(const uint64_t indexed_seq, CommitEntry* entry);
|
||||
|
||||
// Rewrite the entry with the index indexed_seq in the commit table with the
|
||||
// commit entry <prep_seq, commit_seq>. If the rewrite results into eviction,
|
||||
// sets the evicted_entry and returns true.
|
||||
bool AddCommitEntry(uint64_t indexed_seq, CommitEntry& new_entry,
|
||||
bool AddCommitEntry(const uint64_t indexed_seq, const CommitEntry& new_entry,
|
||||
CommitEntry* evicted_entry);
|
||||
|
||||
// Rewrite the entry with the index indexed_seq in the commit table with the
|
||||
// commit entry new_entry only if the existing entry matches the
|
||||
// expected_entry. Returns false otherwise.
|
||||
bool ExchangeCommitEntry(uint64_t indexed_seq, CommitEntry& expected_entry,
|
||||
CommitEntry new_entry);
|
||||
bool ExchangeCommitEntry(const uint64_t indexed_seq,
|
||||
const CommitEntry& expected_entry,
|
||||
const CommitEntry& new_entry);
|
||||
|
||||
// Increase max_evicted_seq_ from the previous value prev_max to the new
|
||||
// value. This also involves taking care of prepared txns that are not
|
||||
// committed before new_max, as well as updating the list of live snapshots at
|
||||
// the time of updating the max. Thread-safety: this function can be called
|
||||
// concurrently. The concurrent invocations of this function is equivalent to
|
||||
// a serial invocation in which the last invocation is the one with the
|
||||
// largetst new_max value.
|
||||
void AdvanceMaxEvictedSeq(SequenceNumber& prev_max, SequenceNumber& new_max);
|
||||
|
||||
// Update the list of snapshots corresponding to the soon-to-be-updated
|
||||
// max_eviceted_seq_. Thread-safety: this function can be called concurrently.
|
||||
// The concurrent invocations of this function is equivalent to a serial
|
||||
// invocation in which the last invocation is the one with the largetst
|
||||
// version value.
|
||||
void UpdateSnapshots(const std::vector<SequenceNumber>& snapshots,
|
||||
const SequenceNumber& version);
|
||||
|
||||
// Check an evicted entry against live snapshots to see if it should be kept
|
||||
// around or it can be safely discarded (and hence assume committed for all
|
||||
// snapshots). Thread-safety: this function can be called concurrently. If it
|
||||
// is called concurrently with multiple UpdateSnapshots, the result is the
|
||||
// same as checking the intersection of the snapshot list before updates with
|
||||
// the snapshot list of all the concurrent updates.
|
||||
void CheckAgainstSnapshots(const CommitEntry& evicted);
|
||||
|
||||
// Add a new entry to old_commit_map_ if prep_seq <= snapshot_seq <
|
||||
// commit_seq. Return false if checking the next snapshot(s) is not needed.
|
||||
|
@ -9,7 +9,8 @@
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
#include "utilities/transactions/transaction_test.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
@ -38,114 +39,20 @@ using std::string;
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class TransactionTest : public ::testing::TestWithParam<
|
||||
std::tuple<bool, bool, TxnDBWritePolicy>> {
|
||||
public:
|
||||
TransactionDB* db;
|
||||
FaultInjectionTestEnv* env;
|
||||
string dbname;
|
||||
Options options;
|
||||
|
||||
TransactionDBOptions txn_db_options;
|
||||
|
||||
TransactionTest() {
|
||||
options.create_if_missing = true;
|
||||
options.max_write_buffer_number = 2;
|
||||
options.write_buffer_size = 4 * 1024;
|
||||
options.level0_file_num_compaction_trigger = 2;
|
||||
options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
|
||||
env = new FaultInjectionTestEnv(Env::Default());
|
||||
options.env = env;
|
||||
options.concurrent_prepare = std::get<1>(GetParam());
|
||||
dbname = test::TmpDir() + "/transaction_testdb";
|
||||
|
||||
DestroyDB(dbname, options);
|
||||
txn_db_options.transaction_lock_timeout = 0;
|
||||
txn_db_options.default_lock_timeout = 0;
|
||||
txn_db_options.write_policy = std::get<2>(GetParam());
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
assert(s.ok());
|
||||
}
|
||||
|
||||
~TransactionTest() {
|
||||
delete db;
|
||||
DestroyDB(dbname, options);
|
||||
delete env;
|
||||
}
|
||||
|
||||
Status ReOpenNoDelete() {
|
||||
delete db;
|
||||
db = nullptr;
|
||||
env->AssertNoOpenFile();
|
||||
env->DropUnsyncedFileData();
|
||||
env->ResetState();
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status ReOpen() {
|
||||
delete db;
|
||||
DestroyDB(dbname, options);
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status OpenWithStackableDB() {
|
||||
std::vector<size_t> compaction_enabled_cf_indices;
|
||||
std::vector<ColumnFamilyDescriptor> column_families{ColumnFamilyDescriptor(
|
||||
kDefaultColumnFamilyName, ColumnFamilyOptions(options))};
|
||||
|
||||
TransactionDB::PrepareWrap(&options, &column_families,
|
||||
&compaction_enabled_cf_indices);
|
||||
std::vector<ColumnFamilyHandle*> handles;
|
||||
DB* root_db;
|
||||
Options options_copy(options);
|
||||
Status s =
|
||||
DB::Open(options_copy, dbname, column_families, &handles, &root_db);
|
||||
if (s.ok()) {
|
||||
assert(handles.size() == 1);
|
||||
s = TransactionDB::WrapStackableDB(
|
||||
new StackableDB(root_db), txn_db_options,
|
||||
compaction_enabled_cf_indices, handles, &db);
|
||||
delete handles[0];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
class MySQLStyleTransactionTest : public TransactionTest {};
|
||||
class WritePreparedTransactionTest : public TransactionTest {};
|
||||
|
||||
static const TxnDBWritePolicy wc = WRITE_COMMITTED;
|
||||
static const TxnDBWritePolicy wp = WRITE_PREPARED;
|
||||
// TODO(myabandeh): Instantiate the tests with other write policies
|
||||
INSTANTIATE_TEST_CASE_P(DBAsBaseDB, TransactionTest,
|
||||
::testing::Values(std::make_tuple(false, false, wc)));
|
||||
::testing::Values(std::make_tuple(false, false,
|
||||
WRITE_COMMITTED)));
|
||||
INSTANTIATE_TEST_CASE_P(StackableDBAsBaseDB, TransactionTest,
|
||||
::testing::Values(std::make_tuple(true, false, wc)));
|
||||
INSTANTIATE_TEST_CASE_P(MySQLStyleTransactionTest, MySQLStyleTransactionTest,
|
||||
::testing::Values(std::make_tuple(false, false, wc),
|
||||
std::make_tuple(false, true, wc),
|
||||
std::make_tuple(true, false, wc),
|
||||
std::make_tuple(true, true, wc)));
|
||||
INSTANTIATE_TEST_CASE_P(WritePreparedTransactionTest,
|
||||
WritePreparedTransactionTest,
|
||||
::testing::Values(std::make_tuple(false, true, wp)));
|
||||
::testing::Values(std::make_tuple(true, false,
|
||||
WRITE_COMMITTED)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MySQLStyleTransactionTest, MySQLStyleTransactionTest,
|
||||
::testing::Values(std::make_tuple(false, false, WRITE_COMMITTED),
|
||||
std::make_tuple(false, true, WRITE_COMMITTED),
|
||||
std::make_tuple(true, false, WRITE_COMMITTED),
|
||||
std::make_tuple(true, true, WRITE_COMMITTED)));
|
||||
|
||||
|
||||
TEST_P(TransactionTest, DoubleEmptyWrite) {
|
||||
WriteOptions write_options;
|
||||
@ -4733,138 +4640,6 @@ TEST_P(TransactionTest, MemoryLimitTest) {
|
||||
delete txn;
|
||||
}
|
||||
|
||||
// Test WritePreparedTxnDB's IsInSnapshot against different ordering of
|
||||
// snapshot, max_committed_seq_, prepared, and commit entries.
|
||||
TEST_P(WritePreparedTransactionTest, IsInSnapshotTest) {
|
||||
WriteOptions wo;
|
||||
// Use small commit cache to trigger lots of eviction and fast advance of
|
||||
// max_evicted_seq_
|
||||
// will take effect after ReOpen
|
||||
WritePreparedTxnDB::DEF_COMMIT_CACHE_SIZE = 8;
|
||||
// Same for snapshot cache size
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE = 5;
|
||||
|
||||
// Take some preliminary snapshots first. This is to stress the data structure
|
||||
// that holds the old snapshots as it will be designed to be efficient when
|
||||
// only a few snapshots are below the max_evicted_seq_.
|
||||
for (int max_snapshots = 1; max_snapshots < 20; max_snapshots++) {
|
||||
// Leave some gap between the preliminary snapshots and the final snapshot
|
||||
// that we check. This should test for also different overlapping scnearios
|
||||
// between the last snapshot and the commits.
|
||||
for (int max_gap = 1; max_gap < 10; max_gap++) {
|
||||
// Since we do not actually write to db, we mock the seq as it would be
|
||||
// increaased by the db. The only exception is that we need db seq to
|
||||
// advance for our snapshots. for which we apply a dummy put each time we
|
||||
// increase our mock of seq.
|
||||
uint64_t seq = 0;
|
||||
// At each step we prepare a txn and then we commit it in the next txn.
|
||||
// This emulates the consecuitive transactions that write to the same key
|
||||
uint64_t cur_txn = 0;
|
||||
// Number of snapshots taken so far
|
||||
int num_snapshots = 0;
|
||||
std::vector<const Snapshot*> to_be_released;
|
||||
// Number of gaps applied so far
|
||||
int gap_cnt = 0;
|
||||
// The final snapshot that we will inspect
|
||||
uint64_t snapshot = 0;
|
||||
bool found_committed = false;
|
||||
// To stress the data structure that maintain prepared txns, at each cycle
|
||||
// we add a new prepare txn. These do not mean to be committed for
|
||||
// snapshot inspection.
|
||||
std::set<uint64_t> prepared;
|
||||
// We keep the list of txns comitted before we take the last snaphot.
|
||||
// These should be the only seq numbers that will be found in the snapshot
|
||||
std::set<uint64_t> committed_before;
|
||||
ReOpen(); // to restart the db
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
assert(wp_db);
|
||||
assert(wp_db->db_impl_);
|
||||
// We continue until max advances a bit beyond the snapshot.
|
||||
while (!snapshot || wp_db->max_evicted_seq_ < snapshot + 100) {
|
||||
// do prepare for a transaction
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
wp_db->AddPrepared(seq);
|
||||
prepared.insert(seq);
|
||||
|
||||
// If cur_txn is not started, do prepare for it.
|
||||
if (!cur_txn) {
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
cur_txn = seq;
|
||||
wp_db->AddPrepared(cur_txn);
|
||||
} else { // else commit it
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
wp_db->AddCommitted(cur_txn, seq);
|
||||
if (!snapshot) {
|
||||
committed_before.insert(cur_txn);
|
||||
}
|
||||
cur_txn = 0;
|
||||
}
|
||||
|
||||
if (num_snapshots < max_snapshots - 1) {
|
||||
// Take preliminary snapshots
|
||||
auto tmp_snapshot = db->GetSnapshot();
|
||||
to_be_released.push_back(tmp_snapshot);
|
||||
num_snapshots++;
|
||||
} else if (gap_cnt < max_gap) {
|
||||
// Wait for some gap before taking the final snapshot
|
||||
gap_cnt++;
|
||||
} else if (!snapshot) {
|
||||
// Take the final snapshot if it is not already taken
|
||||
auto tmp_snapshot = db->GetSnapshot();
|
||||
to_be_released.push_back(tmp_snapshot);
|
||||
snapshot = tmp_snapshot->GetSequenceNumber();
|
||||
// We increase the db seq artificailly by a dummy Put. Check that this
|
||||
// technique is effective and db seq is that same as ours.
|
||||
ASSERT_EQ(snapshot, seq);
|
||||
num_snapshots++;
|
||||
}
|
||||
|
||||
// If the snapshot is taken, verify seq numbers visible to it. We redo
|
||||
// it at each cycle to test that the system is still sound when
|
||||
// max_evicted_seq_ advances.
|
||||
if (snapshot) {
|
||||
for (uint64_t s = 0; s <= seq; s++) {
|
||||
bool was_committed =
|
||||
(committed_before.find(s) != committed_before.end());
|
||||
bool is_in_snapshot = wp_db->IsInSnapshot(s, snapshot);
|
||||
if (was_committed != is_in_snapshot) {
|
||||
printf("max_snapshots %d max_gap %d seq %" PRIu64 " max %" PRIu64
|
||||
" snapshot %" PRIu64
|
||||
" gap_cnt %d num_snapshots %d s %" PRIu64 "\n",
|
||||
max_snapshots, max_gap, seq,
|
||||
wp_db->max_evicted_seq_.load(), snapshot, gap_cnt,
|
||||
num_snapshots, s);
|
||||
}
|
||||
ASSERT_EQ(was_committed, is_in_snapshot);
|
||||
found_committed = found_committed || is_in_snapshot;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Safety check to make sure the test actually ran
|
||||
ASSERT_TRUE(found_committed);
|
||||
// As an extra check, check if prepared set will be properly empty after
|
||||
// they are committed.
|
||||
if (cur_txn) {
|
||||
wp_db->AddCommitted(cur_txn, seq);
|
||||
}
|
||||
for (auto p : prepared) {
|
||||
wp_db->AddCommitted(p, seq);
|
||||
}
|
||||
ASSERT_TRUE(wp_db->delayed_prepared_.empty());
|
||||
ASSERT_TRUE(wp_db->prepared_txns_.empty());
|
||||
for (auto s : to_be_released) {
|
||||
db->ReleaseSnapshot(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
131
utilities/transactions/transaction_test.h
Normal file
131
utilities/transactions/transaction_test.h
Normal file
@ -0,0 +1,131 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "rocksdb/db.h"
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/utilities/transaction.h"
|
||||
#include "rocksdb/utilities/transaction_db.h"
|
||||
#include "table/mock_table.h"
|
||||
#include "util/fault_injection_test_env.h"
|
||||
#include "util/random.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
#include "util/transaction_test_util.h"
|
||||
#include "utilities/merge_operators.h"
|
||||
#include "utilities/merge_operators/string_append/stringappend.h"
|
||||
#include "utilities/transactions/pessimistic_transaction_db.h"
|
||||
|
||||
#include "port/port.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class TransactionTest : public ::testing::TestWithParam<
|
||||
std::tuple<bool, bool, TxnDBWritePolicy>> {
|
||||
public:
|
||||
TransactionDB* db;
|
||||
FaultInjectionTestEnv* env;
|
||||
std::string dbname;
|
||||
Options options;
|
||||
|
||||
TransactionDBOptions txn_db_options;
|
||||
|
||||
TransactionTest() {
|
||||
options.create_if_missing = true;
|
||||
options.max_write_buffer_number = 2;
|
||||
options.write_buffer_size = 4 * 1024;
|
||||
options.level0_file_num_compaction_trigger = 2;
|
||||
options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
|
||||
env = new FaultInjectionTestEnv(Env::Default());
|
||||
options.env = env;
|
||||
options.concurrent_prepare = std::get<1>(GetParam());
|
||||
dbname = test::TmpDir() + "/transaction_testdb";
|
||||
|
||||
DestroyDB(dbname, options);
|
||||
txn_db_options.transaction_lock_timeout = 0;
|
||||
txn_db_options.default_lock_timeout = 0;
|
||||
txn_db_options.write_policy = std::get<2>(GetParam());
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
assert(s.ok());
|
||||
}
|
||||
|
||||
~TransactionTest() {
|
||||
delete db;
|
||||
DestroyDB(dbname, options);
|
||||
delete env;
|
||||
}
|
||||
|
||||
Status ReOpenNoDelete() {
|
||||
delete db;
|
||||
db = nullptr;
|
||||
env->AssertNoOpenFile();
|
||||
env->DropUnsyncedFileData();
|
||||
env->ResetState();
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status ReOpen() {
|
||||
delete db;
|
||||
DestroyDB(dbname, options);
|
||||
Status s;
|
||||
if (std::get<0>(GetParam()) == false) {
|
||||
s = TransactionDB::Open(options, txn_db_options, dbname, &db);
|
||||
} else {
|
||||
s = OpenWithStackableDB();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status OpenWithStackableDB() {
|
||||
std::vector<size_t> compaction_enabled_cf_indices;
|
||||
std::vector<ColumnFamilyDescriptor> column_families{ColumnFamilyDescriptor(
|
||||
kDefaultColumnFamilyName, ColumnFamilyOptions(options))};
|
||||
|
||||
TransactionDB::PrepareWrap(&options, &column_families,
|
||||
&compaction_enabled_cf_indices);
|
||||
std::vector<ColumnFamilyHandle*> handles;
|
||||
DB* root_db;
|
||||
Options options_copy(options);
|
||||
Status s =
|
||||
DB::Open(options_copy, dbname, column_families, &handles, &root_db);
|
||||
if (s.ok()) {
|
||||
assert(handles.size() == 1);
|
||||
s = TransactionDB::WrapStackableDB(
|
||||
new StackableDB(root_db), txn_db_options,
|
||||
compaction_enabled_cf_indices, handles, &db);
|
||||
delete handles[0];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
};
|
||||
|
||||
class MySQLStyleTransactionTest : public TransactionTest {};
|
||||
|
||||
} // namespace rocksdb
|
569
utilities/transactions/write_prepared_transaction_test.cc
Normal file
569
utilities/transactions/write_prepared_transaction_test.cc
Normal file
@ -0,0 +1,569 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
|
||||
#include "utilities/transactions/transaction_test.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "rocksdb/db.h"
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/utilities/transaction.h"
|
||||
#include "rocksdb/utilities/transaction_db.h"
|
||||
#include "table/mock_table.h"
|
||||
#include "util/fault_injection_test_env.h"
|
||||
#include "util/random.h"
|
||||
#include "util/string_util.h"
|
||||
#include "util/sync_point.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
#include "util/transaction_test_util.h"
|
||||
#include "utilities/merge_operators.h"
|
||||
#include "utilities/merge_operators/string_append/stringappend.h"
|
||||
#include "utilities/transactions/pessimistic_transaction_db.h"
|
||||
|
||||
#include "port/port.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
using CommitEntry = PessimisticTransactionDB::CommitEntry;
|
||||
|
||||
TEST(PreparedHeap, BasicsTest) {
|
||||
WritePreparedTxnDB::PreparedHeap heap;
|
||||
heap.push(14l);
|
||||
// Test with one element
|
||||
ASSERT_EQ(14l, heap.top());
|
||||
heap.push(24l);
|
||||
heap.push(34l);
|
||||
// Test that old min is still on top
|
||||
ASSERT_EQ(14l, heap.top());
|
||||
heap.push(13l);
|
||||
// Test that the new min will be on top
|
||||
ASSERT_EQ(13l, heap.top());
|
||||
// Test that it is persistent
|
||||
ASSERT_EQ(13l, heap.top());
|
||||
heap.push(44l);
|
||||
heap.push(54l);
|
||||
heap.push(64l);
|
||||
heap.push(74l);
|
||||
heap.push(84l);
|
||||
// Test that old min is still on top
|
||||
ASSERT_EQ(13l, heap.top());
|
||||
heap.erase(24l);
|
||||
// Test that old min is still on top
|
||||
ASSERT_EQ(13l, heap.top());
|
||||
heap.erase(14l);
|
||||
// Test that old min is still on top
|
||||
ASSERT_EQ(13l, heap.top());
|
||||
heap.erase(13l);
|
||||
// Test that the new comes to the top after multiple erase
|
||||
ASSERT_EQ(34l, heap.top());
|
||||
heap.erase(34l);
|
||||
// Test that the new comes to the top after single erase
|
||||
ASSERT_EQ(44l, heap.top());
|
||||
heap.erase(54l);
|
||||
ASSERT_EQ(44l, heap.top());
|
||||
heap.pop(); // pop 44l
|
||||
// Test that the erased items are ignored after pop
|
||||
ASSERT_EQ(64l, heap.top());
|
||||
heap.erase(44l);
|
||||
// Test that erasing an already popped item would work
|
||||
ASSERT_EQ(64l, heap.top());
|
||||
heap.erase(84l);
|
||||
ASSERT_EQ(64l, heap.top());
|
||||
heap.push(85l);
|
||||
heap.push(86l);
|
||||
heap.push(87l);
|
||||
heap.push(88l);
|
||||
heap.push(89l);
|
||||
heap.erase(87l);
|
||||
heap.erase(85l);
|
||||
heap.erase(89l);
|
||||
heap.erase(86l);
|
||||
heap.erase(88l);
|
||||
// Test top remians the same after a ranodm order of many erases
|
||||
ASSERT_EQ(64l, heap.top());
|
||||
heap.pop();
|
||||
// Test that pop works with a series of random pending erases
|
||||
ASSERT_EQ(74l, heap.top());
|
||||
ASSERT_FALSE(heap.empty());
|
||||
heap.pop();
|
||||
// Test that empty works
|
||||
ASSERT_TRUE(heap.empty());
|
||||
}
|
||||
|
||||
class WritePreparedTransactionTest : public TransactionTest {
|
||||
protected:
|
||||
// If expect_update is set, check if it actually updated old_commit_map_. If
|
||||
// it did not and yet suggested not to check the next snapshot, do the
|
||||
// opposite to check if it was not a bad suggstion.
|
||||
void MaybeUpdateOldCommitMapTestWithNext(uint64_t prepare, uint64_t commit,
|
||||
uint64_t snapshot,
|
||||
uint64_t next_snapshot,
|
||||
bool expect_update) {
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
// reset old_commit_map_empty_ so that its value indicate whether
|
||||
// old_commit_map_ was updated
|
||||
wp_db->old_commit_map_empty_ = true;
|
||||
bool check_next = wp_db->MaybeUpdateOldCommitMap(prepare, commit, snapshot,
|
||||
snapshot < next_snapshot);
|
||||
if (expect_update == wp_db->old_commit_map_empty_) {
|
||||
printf("prepare: %" PRIu64 " commit: %" PRIu64 " snapshot: %" PRIu64
|
||||
" next: %" PRIu64 "\n",
|
||||
prepare, commit, snapshot, next_snapshot);
|
||||
}
|
||||
EXPECT_EQ(!expect_update, wp_db->old_commit_map_empty_);
|
||||
if (!check_next && wp_db->old_commit_map_empty_) {
|
||||
// do the oppotisite to make sure it was not a bad suggestion
|
||||
const bool dont_care_bool = true;
|
||||
wp_db->MaybeUpdateOldCommitMap(prepare, commit, next_snapshot,
|
||||
dont_care_bool);
|
||||
if (!wp_db->old_commit_map_empty_) {
|
||||
printf("prepare: %" PRIu64 " commit: %" PRIu64 " snapshot: %" PRIu64
|
||||
" next: %" PRIu64 "\n",
|
||||
prepare, commit, snapshot, next_snapshot);
|
||||
}
|
||||
EXPECT_TRUE(wp_db->old_commit_map_empty_);
|
||||
}
|
||||
}
|
||||
|
||||
// Test that a CheckAgainstSnapshots thread reading old_snapshots will not
|
||||
// miss a snapshot because of a concurrent update by UpdateSnapshots that is
|
||||
// writing new_snapshots. Both threads are broken at two points. The sync
|
||||
// points to enforce them are specified by a1, a2, b1, and b2. CommitEntry
|
||||
// entry is expected to be vital for one of the snapshots that is common
|
||||
// between the old and new list of snapshots.
|
||||
void SnapshotConcurrentAccessTestInternal(
|
||||
WritePreparedTxnDB* wp_db,
|
||||
const std::vector<SequenceNumber>& old_snapshots,
|
||||
const std::vector<SequenceNumber>& new_snapshots, CommitEntry& entry,
|
||||
SequenceNumber& version, size_t a1, size_t a2, size_t b1, size_t b2) {
|
||||
// First reset the snapshot list
|
||||
const std::vector<SequenceNumber> empty_snapshots;
|
||||
wp_db->old_commit_map_empty_ = true;
|
||||
wp_db->UpdateSnapshots(empty_snapshots, ++version);
|
||||
// Then initialize it with the old_snapshots
|
||||
wp_db->UpdateSnapshots(old_snapshots, ++version);
|
||||
|
||||
// Starting from the first thread, cut each thread at two points
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
||||
{"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(a1),
|
||||
"WritePreparedTxnDB::UpdateSnapshots:s:start"},
|
||||
{"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(b1),
|
||||
"WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(a1)},
|
||||
{"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(a2),
|
||||
"WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(b1)},
|
||||
{"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(b2),
|
||||
"WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(a2)},
|
||||
{"WritePreparedTxnDB::CheckAgainstSnapshots:p:end",
|
||||
"WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(b2)},
|
||||
});
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
{
|
||||
ASSERT_TRUE(wp_db->old_commit_map_empty_);
|
||||
rocksdb::port::Thread t1(
|
||||
[&]() { wp_db->UpdateSnapshots(new_snapshots, version); });
|
||||
rocksdb::port::Thread t2([&]() { wp_db->CheckAgainstSnapshots(entry); });
|
||||
t1.join();
|
||||
t2.join();
|
||||
ASSERT_FALSE(wp_db->old_commit_map_empty_);
|
||||
}
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
|
||||
wp_db->old_commit_map_empty_ = true;
|
||||
wp_db->UpdateSnapshots(empty_snapshots, ++version);
|
||||
wp_db->UpdateSnapshots(old_snapshots, ++version);
|
||||
// Starting from the second thread, cut each thread at two points
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
||||
{"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(a1),
|
||||
"WritePreparedTxnDB::CheckAgainstSnapshots:s:start"},
|
||||
{"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(b1),
|
||||
"WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(a1)},
|
||||
{"WritePreparedTxnDB::UpdateSnapshots:p:" + std::to_string(a2),
|
||||
"WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(b1)},
|
||||
{"WritePreparedTxnDB::CheckAgainstSnapshots:p:" + std::to_string(b2),
|
||||
"WritePreparedTxnDB::UpdateSnapshots:s:" + std::to_string(a2)},
|
||||
{"WritePreparedTxnDB::UpdateSnapshots:p:end",
|
||||
"WritePreparedTxnDB::CheckAgainstSnapshots:s:" + std::to_string(b2)},
|
||||
});
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
{
|
||||
ASSERT_TRUE(wp_db->old_commit_map_empty_);
|
||||
rocksdb::port::Thread t1(
|
||||
[&]() { wp_db->UpdateSnapshots(new_snapshots, version); });
|
||||
rocksdb::port::Thread t2([&]() { wp_db->CheckAgainstSnapshots(entry); });
|
||||
t1.join();
|
||||
t2.join();
|
||||
ASSERT_FALSE(wp_db->old_commit_map_empty_);
|
||||
}
|
||||
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(WritePreparedTransactionTest,
|
||||
WritePreparedTransactionTest,
|
||||
::testing::Values(std::make_tuple(false, true,
|
||||
WRITE_PREPARED)));
|
||||
|
||||
TEST_P(WritePreparedTransactionTest, CommitMapTest) {
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
assert(wp_db);
|
||||
assert(wp_db->db_impl_);
|
||||
size_t size = wp_db->COMMIT_CACHE_SIZE;
|
||||
CommitEntry c = {5, 12}, e;
|
||||
bool evicted = wp_db->AddCommitEntry(c.prep_seq % size, c, &e);
|
||||
ASSERT_FALSE(evicted);
|
||||
|
||||
// Should be able to read the same value
|
||||
bool found = wp_db->GetCommitEntry(c.prep_seq % size, &e);
|
||||
ASSERT_TRUE(found);
|
||||
ASSERT_EQ(c, e);
|
||||
// Should be able to distinguish between overlapping entries
|
||||
found = wp_db->GetCommitEntry((c.prep_seq + size) % size, &e);
|
||||
ASSERT_TRUE(found);
|
||||
ASSERT_NE(c.prep_seq + size, e.prep_seq);
|
||||
// Should be able to detect non-existent entry
|
||||
found = wp_db->GetCommitEntry((c.prep_seq + 1) % size, &e);
|
||||
ASSERT_EQ(e.commit_seq, 0);
|
||||
ASSERT_FALSE(found);
|
||||
|
||||
// Reject an invalid exchange
|
||||
CommitEntry e2 = {c.prep_seq + size, c.commit_seq};
|
||||
bool exchanged = wp_db->ExchangeCommitEntry(e2.prep_seq % size, e2, e);
|
||||
ASSERT_FALSE(exchanged);
|
||||
// check whether it did actually reject that
|
||||
found = wp_db->GetCommitEntry(e2.prep_seq % size, &e);
|
||||
ASSERT_TRUE(found);
|
||||
ASSERT_EQ(c, e);
|
||||
|
||||
// Accept a valid exchange
|
||||
CommitEntry e3 = {c.prep_seq + size, c.commit_seq + size + 1};
|
||||
exchanged = wp_db->ExchangeCommitEntry(c.prep_seq % size, c, e3);
|
||||
ASSERT_TRUE(exchanged);
|
||||
// check whether it did actually accepted that
|
||||
found = wp_db->GetCommitEntry(c.prep_seq % size, &e);
|
||||
ASSERT_TRUE(found);
|
||||
ASSERT_EQ(e3, e);
|
||||
|
||||
// Rewrite an entry
|
||||
CommitEntry e4 = {e3.prep_seq + size, e3.commit_seq + size + 1};
|
||||
evicted = wp_db->AddCommitEntry(e4.prep_seq % size, e4, &e);
|
||||
ASSERT_TRUE(evicted);
|
||||
ASSERT_EQ(e3, e);
|
||||
found = wp_db->GetCommitEntry(e4.prep_seq % size, &e);
|
||||
ASSERT_TRUE(found);
|
||||
ASSERT_EQ(e4, e);
|
||||
}
|
||||
|
||||
TEST_P(WritePreparedTransactionTest, MaybeUpdateOldCommitMap) {
|
||||
// If prepare <= snapshot < commit we should keep the entry around since its
|
||||
// nonexistence could be interpreted as committed in the snapshot while it is
|
||||
// not true. We keep such entries around by adding them to the
|
||||
// old_commit_map_.
|
||||
uint64_t p /*prepare*/, c /*commit*/, s /*snapshot*/, ns /*next_snapshot*/;
|
||||
p = 10l, c = 15l, s = 20l, ns = 21l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
// If we do not expect the old commit map to be updated, try also with a next
|
||||
// snapshot that is expected to update the old commit map. This would test
|
||||
// that MaybeUpdateOldCommitMap would not prevent us from checking the next
|
||||
// snapshot that must be checked.
|
||||
p = 10l, c = 15l, s = 20l, ns = 11l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
|
||||
p = 10l, c = 20l, s = 20l, ns = 19l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
p = 10l, c = 20l, s = 20l, ns = 21l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
|
||||
p = 20l, c = 20l, s = 20l, ns = 21l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
p = 20l, c = 20l, s = 20l, ns = 19l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
|
||||
p = 10l, c = 25l, s = 20l, ns = 21l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, true);
|
||||
|
||||
p = 20l, c = 25l, s = 20l, ns = 21l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, true);
|
||||
|
||||
p = 21l, c = 25l, s = 20l, ns = 22l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
p = 21l, c = 25l, s = 20l, ns = 19l;
|
||||
MaybeUpdateOldCommitMapTestWithNext(p, c, s, ns, false);
|
||||
}
|
||||
|
||||
TEST_P(WritePreparedTransactionTest, CheckAgainstSnapshotsTest) {
|
||||
std::vector<SequenceNumber> snapshots = {100l, 200l, 300l, 400l,
|
||||
500l, 600l, 700l};
|
||||
// will take effect after ReOpen
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE = snapshots.size() / 2;
|
||||
ReOpen(); // to restart the db
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
assert(wp_db);
|
||||
assert(wp_db->db_impl_);
|
||||
SequenceNumber version = 1000l;
|
||||
ASSERT_EQ(0, wp_db->snapshots_total_);
|
||||
wp_db->UpdateSnapshots(snapshots, version);
|
||||
ASSERT_EQ(snapshots.size(), wp_db->snapshots_total_);
|
||||
// seq numbers are chosen so that we have two of them between each two
|
||||
// snapshots. If the diff of two consecuitive seq is more than 5, there is a
|
||||
// snapshot between them.
|
||||
std::vector<SequenceNumber> seqs = {50l, 55l, 150l, 155l, 250l, 255l,
|
||||
350l, 355l, 450l, 455l, 550l, 555l,
|
||||
650l, 655l, 750l, 755l};
|
||||
assert(seqs.size() > 1);
|
||||
for (size_t i = 0; i < seqs.size() - 1; i++) {
|
||||
wp_db->old_commit_map_empty_ = true; // reset
|
||||
CommitEntry commit_entry = {seqs[i], seqs[i + 1]};
|
||||
wp_db->CheckAgainstSnapshots(commit_entry);
|
||||
// Expect update if there is snapshot in between the prepare and commit
|
||||
bool expect_update = commit_entry.commit_seq - commit_entry.prep_seq > 5 &&
|
||||
commit_entry.commit_seq >= snapshots.front() &&
|
||||
commit_entry.prep_seq <= snapshots.back();
|
||||
ASSERT_EQ(expect_update, !wp_db->old_commit_map_empty_);
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if the ith bit is set in combination represented by comb
|
||||
bool IsInCombination(size_t i, size_t comb) { return comb & (1 << i); }
|
||||
|
||||
// Test that CheckAgainstSnapshots will not miss a live snapshot if it is run in
|
||||
// parallel with UpdateSnapshots.
|
||||
TEST_P(WritePreparedTransactionTest, SnapshotConcurrentAccessTest) {
|
||||
// We have a sync point in the method under test after checking each snapshot.
|
||||
// If you increase the max number of snapshots in this test, more sync points
|
||||
// in the methods must also be added.
|
||||
const std::vector<SequenceNumber> snapshots = {10l, 20l, 30l, 40l, 50l,
|
||||
60l, 70l, 80l, 90l, 100l};
|
||||
SequenceNumber version = 1000l;
|
||||
// Choose the cache size so that the new snapshot list could replace all the
|
||||
// existing items in the cache and also have some overflow Will take effect
|
||||
// after ReOpen
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE = (snapshots.size() - 2) / 2;
|
||||
ReOpen(); // to restart the db
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
assert(wp_db);
|
||||
assert(wp_db->db_impl_);
|
||||
// Add up to 2 items that do not fit into the cache
|
||||
for (size_t old_size = 1;
|
||||
old_size <= WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE + 2;
|
||||
old_size++) {
|
||||
const std::vector<SequenceNumber> old_snapshots(
|
||||
snapshots.begin(), snapshots.begin() + old_size);
|
||||
|
||||
// Each member of old snapshot might or might not appear in the new list. We
|
||||
// create a common_snapshots for each combination.
|
||||
size_t new_comb_cnt = static_cast<size_t>(1 << old_size);
|
||||
for (size_t new_comb = 0; new_comb < new_comb_cnt; new_comb++) {
|
||||
std::vector<SequenceNumber> common_snapshots;
|
||||
for (size_t i = 0; i < old_snapshots.size(); i++) {
|
||||
if (IsInCombination(i, new_comb)) {
|
||||
common_snapshots.push_back(old_snapshots[i]);
|
||||
}
|
||||
}
|
||||
// And add some new snapshots to the common list
|
||||
for (size_t added_snapshots = 0;
|
||||
added_snapshots <= snapshots.size() - old_snapshots.size();
|
||||
added_snapshots++) {
|
||||
std::vector<SequenceNumber> new_snapshots = common_snapshots;
|
||||
for (size_t i = 0; i < added_snapshots; i++) {
|
||||
new_snapshots.push_back(snapshots[old_snapshots.size() + i]);
|
||||
}
|
||||
for (auto it = common_snapshots.begin(); it != common_snapshots.end();
|
||||
it++) {
|
||||
auto snapshot = *it;
|
||||
// Create a commit entry that is around the snapshot and thus should
|
||||
// be not be discarded
|
||||
CommitEntry entry = {static_cast<uint64_t>(snapshot - 1),
|
||||
snapshot + 1};
|
||||
// The critical part is when iterating the snapshot cache. Afterwards,
|
||||
// we are operating under the lock
|
||||
size_t a_range =
|
||||
std::min(old_snapshots.size(),
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE) +
|
||||
1;
|
||||
size_t b_range =
|
||||
std::min(new_snapshots.size(),
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE) +
|
||||
1;
|
||||
// Break each thread at two points
|
||||
for (size_t a1 = 1; a1 <= a_range; a1++) {
|
||||
for (size_t a2 = a1 + 1; a2 <= a_range; a2++) {
|
||||
for (size_t b1 = 1; b1 <= b_range; b1++) {
|
||||
for (size_t b2 = b1 + 1; b2 <= b_range; b2++) {
|
||||
SnapshotConcurrentAccessTestInternal(wp_db, old_snapshots,
|
||||
new_snapshots, entry,
|
||||
version, a1, a2, b1, b2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test WritePreparedTxnDB's IsInSnapshot against different ordering of
|
||||
// snapshot, max_committed_seq_, prepared, and commit entries.
|
||||
TEST_P(WritePreparedTransactionTest, IsInSnapshotTest) {
|
||||
WriteOptions wo;
|
||||
// Use small commit cache to trigger lots of eviction and fast advance of
|
||||
// max_evicted_seq_
|
||||
// will take effect after ReOpen
|
||||
WritePreparedTxnDB::DEF_COMMIT_CACHE_SIZE = 8;
|
||||
// Same for snapshot cache size
|
||||
WritePreparedTxnDB::DEF_SNAPSHOT_CACHE_SIZE = 5;
|
||||
|
||||
// Take some preliminary snapshots first. This is to stress the data structure
|
||||
// that holds the old snapshots as it will be designed to be efficient when
|
||||
// only a few snapshots are below the max_evicted_seq_.
|
||||
for (int max_snapshots = 1; max_snapshots < 20; max_snapshots++) {
|
||||
// Leave some gap between the preliminary snapshots and the final snapshot
|
||||
// that we check. This should test for also different overlapping scnearios
|
||||
// between the last snapshot and the commits.
|
||||
for (int max_gap = 1; max_gap < 10; max_gap++) {
|
||||
// Since we do not actually write to db, we mock the seq as it would be
|
||||
// increaased by the db. The only exception is that we need db seq to
|
||||
// advance for our snapshots. for which we apply a dummy put each time we
|
||||
// increase our mock of seq.
|
||||
uint64_t seq = 0;
|
||||
// At each step we prepare a txn and then we commit it in the next txn.
|
||||
// This emulates the consecuitive transactions that write to the same key
|
||||
uint64_t cur_txn = 0;
|
||||
// Number of snapshots taken so far
|
||||
int num_snapshots = 0;
|
||||
std::vector<const Snapshot*> to_be_released;
|
||||
// Number of gaps applied so far
|
||||
int gap_cnt = 0;
|
||||
// The final snapshot that we will inspect
|
||||
uint64_t snapshot = 0;
|
||||
bool found_committed = false;
|
||||
// To stress the data structure that maintain prepared txns, at each cycle
|
||||
// we add a new prepare txn. These do not mean to be committed for
|
||||
// snapshot inspection.
|
||||
std::set<uint64_t> prepared;
|
||||
// We keep the list of txns comitted before we take the last snaphot.
|
||||
// These should be the only seq numbers that will be found in the snapshot
|
||||
std::set<uint64_t> committed_before;
|
||||
ReOpen(); // to restart the db
|
||||
WritePreparedTxnDB* wp_db = dynamic_cast<WritePreparedTxnDB*>(db);
|
||||
assert(wp_db);
|
||||
assert(wp_db->db_impl_);
|
||||
// We continue until max advances a bit beyond the snapshot.
|
||||
while (!snapshot || wp_db->max_evicted_seq_ < snapshot + 100) {
|
||||
// do prepare for a transaction
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
wp_db->AddPrepared(seq);
|
||||
prepared.insert(seq);
|
||||
|
||||
// If cur_txn is not started, do prepare for it.
|
||||
if (!cur_txn) {
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
cur_txn = seq;
|
||||
wp_db->AddPrepared(cur_txn);
|
||||
} else { // else commit it
|
||||
wp_db->db_impl_->Put(wo, "key", "value"); // dummy put to inc db seq
|
||||
seq++;
|
||||
ASSERT_EQ(wp_db->db_impl_->GetLatestSequenceNumber(), seq);
|
||||
wp_db->AddCommitted(cur_txn, seq);
|
||||
if (!snapshot) {
|
||||
committed_before.insert(cur_txn);
|
||||
}
|
||||
cur_txn = 0;
|
||||
}
|
||||
|
||||
if (num_snapshots < max_snapshots - 1) {
|
||||
// Take preliminary snapshots
|
||||
auto tmp_snapshot = db->GetSnapshot();
|
||||
to_be_released.push_back(tmp_snapshot);
|
||||
num_snapshots++;
|
||||
} else if (gap_cnt < max_gap) {
|
||||
// Wait for some gap before taking the final snapshot
|
||||
gap_cnt++;
|
||||
} else if (!snapshot) {
|
||||
// Take the final snapshot if it is not already taken
|
||||
auto tmp_snapshot = db->GetSnapshot();
|
||||
to_be_released.push_back(tmp_snapshot);
|
||||
snapshot = tmp_snapshot->GetSequenceNumber();
|
||||
// We increase the db seq artificailly by a dummy Put. Check that this
|
||||
// technique is effective and db seq is that same as ours.
|
||||
ASSERT_EQ(snapshot, seq);
|
||||
num_snapshots++;
|
||||
}
|
||||
|
||||
// If the snapshot is taken, verify seq numbers visible to it. We redo
|
||||
// it at each cycle to test that the system is still sound when
|
||||
// max_evicted_seq_ advances.
|
||||
if (snapshot) {
|
||||
for (uint64_t s = 0; s <= seq; s++) {
|
||||
bool was_committed =
|
||||
(committed_before.find(s) != committed_before.end());
|
||||
bool is_in_snapshot = wp_db->IsInSnapshot(s, snapshot);
|
||||
if (was_committed != is_in_snapshot) {
|
||||
printf("max_snapshots %d max_gap %d seq %" PRIu64 " max %" PRIu64
|
||||
" snapshot %" PRIu64
|
||||
" gap_cnt %d num_snapshots %d s %" PRIu64 "\n",
|
||||
max_snapshots, max_gap, seq,
|
||||
wp_db->max_evicted_seq_.load(), snapshot, gap_cnt,
|
||||
num_snapshots, s);
|
||||
}
|
||||
ASSERT_EQ(was_committed, is_in_snapshot);
|
||||
found_committed = found_committed || is_in_snapshot;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Safety check to make sure the test actually ran
|
||||
ASSERT_TRUE(found_committed);
|
||||
// As an extra check, check if prepared set will be properly empty after
|
||||
// they are committed.
|
||||
if (cur_txn) {
|
||||
wp_db->AddCommitted(cur_txn, seq);
|
||||
}
|
||||
for (auto p : prepared) {
|
||||
wp_db->AddCommitted(p, seq);
|
||||
}
|
||||
ASSERT_TRUE(wp_db->delayed_prepared_.empty());
|
||||
ASSERT_TRUE(wp_db->prepared_txns_.empty());
|
||||
for (auto s : to_be_released) {
|
||||
db->ReleaseSnapshot(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
#else
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
fprintf(stderr,
|
||||
"SKIPPED as Transactions are not supported in ROCKSDB_LITE\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // ROCKSDB_LITE
|
Loading…
Reference in New Issue
Block a user