Optimize db_stress setup phase (#9475)

Summary:
It is too slow that our `db_crashtest.py` often kills `db_stress` before
the setup phase completes. Profiled it and found a few ways to optimize.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9475

Test Plan:
Measured setup phase time reduced 22% (36 -> 28 seconds) for first run, and
36% (38 -> 24 seconds) for non-first run on empty-ish DB.

- first run benchmark command: `rm -rf /dev/shm/dbstress*/ && mkdir -p /dev/shm/dbstress_expected/ && ./db_stress -max_key=100000000 -destroy_db_initially=1 -expected_values_dir=/dev/shm/dbstress_expected/ -db=/dev/shm/dbstress/ --clear_column_family_one_in=0 --reopen=0 --nooverwritepercent=1`

output before this PR:

```
2022/01/31-11:14:05  Initializing db_stress
...
2022/01/31-11:14:41  Starting database operations
```

output after this PR:

```
...
2022/01/31-11:12:23  Initializing db_stress
...
2022/01/31-11:12:51  Starting database operations
```

- non-first run benchmark command: `./db_stress -max_key=100000000 -destroy_db_initially=0 -expected_values_dir=/dev/shm/dbstress_expected/ -db=/dev/shm/dbstress/ --clear_column_family_one_in=0 --reopen=0 --nooverwritepercent=1`

output before this PR:

```
2022/01/31-11:20:45  Initializing db_stress
...
2022/01/31-11:21:23  Starting database operations
```

output after this PR:

```
2022/01/31-11:22:02  Initializing db_stress
...
2022/01/31-11:22:26  Starting database operations
```

- ran minified crash test a while: `DEBUG_LEVEL=0 TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --simple --interval=10 --max_key=1000000 --write_buffer_size=1048576 --target_file_size_base=1048576 --max_bytes_for_level_base=4194304 --value_size_mult=33`

Reviewed By: anand1976

Differential Revision: D33897793

Pulled By: ajkr

fbshipit-source-id: 0d7b2c93e1e2a9f8a878e87632c2455406313087
This commit is contained in:
Andrew Kryczka 2022-02-01 11:46:12 -08:00 committed by Facebook GitHub Bot
parent a495448eea
commit ed75dddc35
3 changed files with 32 additions and 28 deletions

View File

@ -407,17 +407,15 @@ inline bool GetNextPrefix(const ROCKSDB_NAMESPACE::Slice& src, std::string* v) {
#pragma warning(pop) #pragma warning(pop)
#endif #endif
// convert long to a big-endian slice key // Append `val` to `*key` in fixed-width big-endian format
extern inline std::string GetStringFromInt(int64_t val) { extern inline void AppendIntToString(uint64_t val, std::string* key) {
std::string little_endian_key; // PutFixed64 uses little endian
std::string big_endian_key; PutFixed64(key, val);
PutFixed64(&little_endian_key, val); // Reverse to get big endian
assert(little_endian_key.size() == sizeof(val)); char* int_data = &((*key)[key->size() - sizeof(uint64_t)]);
big_endian_key.resize(sizeof(val)); for (size_t i = 0; i < sizeof(uint64_t) / 2; ++i) {
for (size_t i = 0; i < sizeof(val); ++i) { std::swap(int_data[i], int_data[sizeof(uint64_t) - 1 - i]);
big_endian_key[i] = little_endian_key[sizeof(val) - 1 - i];
} }
return big_endian_key;
} }
// A struct for maintaining the parameters for generating variable length keys // A struct for maintaining the parameters for generating variable length keys
@ -443,13 +441,22 @@ extern inline std::string Key(int64_t val) {
uint64_t window = key_gen_ctx.window; uint64_t window = key_gen_ctx.window;
size_t levels = key_gen_ctx.weights.size(); size_t levels = key_gen_ctx.weights.size();
std::string key; std::string key;
// Over-reserve and for now do not bother `shrink_to_fit()` since the key
// strings are transient.
key.reserve(FLAGS_max_key_len * 8);
uint64_t window_idx = static_cast<uint64_t>(val) / window;
uint64_t offset = static_cast<uint64_t>(val) % window;
for (size_t level = 0; level < levels; ++level) { for (size_t level = 0; level < levels; ++level) {
uint64_t weight = key_gen_ctx.weights[level]; uint64_t weight = key_gen_ctx.weights[level];
uint64_t offset = static_cast<uint64_t>(val) % window; uint64_t pfx;
uint64_t mult = static_cast<uint64_t>(val) / window; if (level == 0) {
uint64_t pfx = mult * weight + (offset >= weight ? weight - 1 : offset); pfx = window_idx * weight;
key.append(GetStringFromInt(pfx)); } else {
pfx = 0;
}
pfx += offset >= weight ? weight - 1 : offset;
AppendIntToString(pfx, &key);
if (offset < weight) { if (offset < weight) {
// Use the bottom 3 bits of offset as the number of trailing 'x's in the // Use the bottom 3 bits of offset as the number of trailing 'x's in the
// key. If the next key is going to be of the next level, then skip the // key. If the next key is going to be of the next level, then skip the
@ -461,8 +468,7 @@ extern inline std::string Key(int64_t val) {
} }
break; break;
} }
val = offset - weight; offset -= weight;
window -= weight;
} }
return key; return key;

View File

@ -158,10 +158,7 @@ class SharedState {
key_locks_.resize(FLAGS_column_families); key_locks_.resize(FLAGS_column_families);
for (int i = 0; i < FLAGS_column_families; ++i) { for (int i = 0; i < FLAGS_column_families; ++i) {
key_locks_[i].resize(num_locks); key_locks_[i].reset(new port::Mutex[num_locks]);
for (auto& ptr : key_locks_[i]) {
ptr.reset(new port::Mutex);
}
} }
#ifndef NDEBUG #ifndef NDEBUG
if (FLAGS_read_fault_one_in) { if (FLAGS_read_fault_one_in) {
@ -227,20 +224,20 @@ class SharedState {
// Returns a lock covering `key` in `cf`. // Returns a lock covering `key` in `cf`.
port::Mutex* GetMutexForKey(int cf, int64_t key) { port::Mutex* GetMutexForKey(int cf, int64_t key) {
return key_locks_[cf][key >> log2_keys_per_lock_].get(); return &key_locks_[cf][key >> log2_keys_per_lock_];
} }
// Acquires locks for all keys in `cf`. // Acquires locks for all keys in `cf`.
void LockColumnFamily(int cf) { void LockColumnFamily(int cf) {
for (auto& mutex : key_locks_[cf]) { for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) {
mutex->Lock(); key_locks_[cf][i].Lock();
} }
} }
// Releases locks for all keys in `cf`. // Releases locks for all keys in `cf`.
void UnlockColumnFamily(int cf) { void UnlockColumnFamily(int cf) {
for (auto& mutex : key_locks_[cf]) { for (int i = 0; i < max_key_ >> log2_keys_per_lock_; ++i) {
mutex->Unlock(); key_locks_[cf][i].Unlock();
} }
} }
@ -361,9 +358,9 @@ class SharedState {
std::unordered_set<size_t> no_overwrite_ids_; std::unordered_set<size_t> no_overwrite_ids_;
std::unique_ptr<ExpectedStateManager> expected_state_manager_; std::unique_ptr<ExpectedStateManager> expected_state_manager_;
// Has to make it owned by a smart ptr as port::Mutex is not copyable // Cannot store `port::Mutex` directly in vector since it is not copyable
// and storing it in the container may require copying depending on the impl. // and storing it in the container may require copying depending on the impl.
std::vector<std::vector<std::unique_ptr<port::Mutex>>> key_locks_; std::vector<std::unique_ptr<port::Mutex[]>> key_locks_;
std::atomic<bool> printing_verification_results_; std::atomic<bool> printing_verification_results_;
}; };

View File

@ -78,7 +78,8 @@ bool ExpectedState::Exists(int cf, int64_t key) {
void ExpectedState::Reset() { void ExpectedState::Reset() {
for (size_t i = 0; i < num_column_families_; ++i) { for (size_t i = 0; i < num_column_families_; ++i) {
for (size_t j = 0; j < max_key_; ++j) { for (size_t j = 0; j < max_key_; ++j) {
Delete(static_cast<int>(i), j, false /* pending */); Value(static_cast<int>(i), j)
.store(SharedState::DELETION_SENTINEL, std::memory_order_relaxed);
} }
} }
} }