From b81b430987f46844b400e933f53f4312e8d0743f Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Fri, 6 Nov 2015 08:07:08 -0800 Subject: [PATCH] Switch to thread-local random for skiplist Summary: Using a TLS random instance for skiplist makes it smaller (useful for hash_skiplist_rep) and prepares skiplist for concurrent adds. This diff also modifies the branching factor math to avoid an unnecessary division. This diff has the effect of changing the sequence of skip list node height choices made by tests, so it has the potential to cause unit test failures for tests that implicitly rely on the exact structure of the skip list. Tests that try to exactly trigger a compaction are likely suspects for this problem (these tests have always been brittle to changes in the skiplist details). I've minimizes this risk by reseeding the main thread's Random at the beginning of each test, increasing the universal compaction size_ratio limit from 101% to 105% for some tests, and verifying that the tests pass many times. Test Plan: for i in `seq 0 9`; do make check; done Reviewers: sdong, igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D50439 --- CMakeLists.txt | 1 + db/corruption_test.cc | 12 +++++++-- db/db_test_util.cc | 1 + db/db_universal_compaction_test.cc | 3 +++ db/skiplist.h | 22 +++++++++-------- src.mk | 1 + util/random.cc | 39 ++++++++++++++++++++++++++++++ util/random.h | 21 +++++++++++++--- 8 files changed, 85 insertions(+), 15 deletions(-) create mode 100644 util/random.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f625c4d1..808f2a64a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,6 +218,7 @@ set(SOURCES util/options_sanity_check.cc util/perf_context.cc util/perf_level.cc + util/random.cc util/rate_limiter.cc util/skiplistrep.cc util/slice.cc diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 81cff970f..475b1bb55 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -232,8 +232,16 @@ class CorruptionTest : public testing::Test { // Return the value to associate with the specified key Slice Value(int k, std::string* storage) { - Random r(k); - return test::RandomString(&r, kValueSize, storage); + if (k == 0) { + // Ugh. Random seed of 0 used to produce no entropy. This code + // preserves the implementation that was in place when all of the + // magic values in this file were picked. + *storage = std::string(kValueSize, ' '); + return Slice(*storage); + } else { + Random r(k); + return test::RandomString(&r, kValueSize, storage); + } } }; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 2a9e6d3c9..356398871 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -55,6 +55,7 @@ DBTestBase::DBTestBase(const std::string path) EXPECT_OK(DestroyDB(dbname_, options)); db_ = nullptr; Reopen(options); + Random::GetTLSInstance()->Reset(0xdeadbeef); } DBTestBase::~DBTestBase() { diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index ec4ec32b8..99091227d 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -119,6 +119,7 @@ class DelayFilterFactory : public CompactionFilterFactory { TEST_P(DBTestUniversalCompaction, UniversalCompactionTrigger) { Options options; options.compaction_style = kCompactionStyleUniversal; + options.compaction_options_universal.size_ratio = 5; options.num_levels = num_levels_; options.write_buffer_size = 105 << 10; // 105KB options.arena_block_size = 4 << 10; @@ -852,6 +853,7 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionFourPaths) { options.db_paths.emplace_back(dbname_ + "_3", 500 * 1024); options.db_paths.emplace_back(dbname_ + "_4", 1024 * 1024 * 1024); options.compaction_style = kCompactionStyleUniversal; + options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 110 << 10; // 105KB options.arena_block_size = 4 << 10; options.level0_file_num_compaction_trigger = 2; @@ -1047,6 +1049,7 @@ TEST_P(DBTestUniversalCompaction, UniversalCompactionSecondPathRatio) { options.db_paths.emplace_back(dbname_, 500 * 1024); options.db_paths.emplace_back(dbname_ + "_2", 1024 * 1024 * 1024); options.compaction_style = kCompactionStyleUniversal; + options.compaction_options_universal.size_ratio = 5; options.write_buffer_size = 110 << 10; // 105KB options.arena_block_size = 4 * 1024; options.arena_block_size = 4 << 10; diff --git a/db/skiplist.h b/db/skiplist.h index 787fad59d..b80ecf210 100644 --- a/db/skiplist.h +++ b/db/skiplist.h @@ -107,8 +107,9 @@ class SkipList { }; private: - const int32_t kMaxHeight_; - const int32_t kBranching_; + const uint16_t kMaxHeight_; + const uint16_t kBranching_; + const uint32_t kScaledInverseBranching_; // Immutable after construction Comparator const compare_; @@ -131,9 +132,6 @@ class SkipList { return max_height_.load(std::memory_order_relaxed); } - // Read/written only by Insert(). - Random rnd_; - Node* NewNode(const Key& key, int height); int RandomHeight(); bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } @@ -264,9 +262,11 @@ inline void SkipList::Iterator::SeekToLast() { template int SkipList::RandomHeight() { + auto rnd = Random::GetTLSInstance(); + // Increase height with probability 1 in kBranching int height = 1; - while (height < kMaxHeight_ && ((rnd_.Next() % kBranching_) == 0)) { + while (height < kMaxHeight_ && rnd->Next() < kScaledInverseBranching_) { height++; } assert(height > 0); @@ -391,14 +391,16 @@ SkipList::SkipList(const Comparator cmp, Allocator* allocator, int32_t branching_factor) : kMaxHeight_(max_height), kBranching_(branching_factor), + kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_), compare_(cmp), allocator_(allocator), head_(NewNode(0 /* any key will do */, max_height)), max_height_(1), - prev_height_(1), - rnd_(0xdeadbeef) { - assert(kMaxHeight_ > 0); - assert(kBranching_ > 0); + prev_height_(1) { + assert(max_height > 0 && kMaxHeight_ == static_cast(max_height)); + assert(branching_factor > 0 && + kBranching_ == static_cast(branching_factor)); + assert(kScaledInverseBranching_ > 0); // Allocate the prev_ Node* array, directly from the passed-in allocator. // prev_ does not need to be freed, as its life cycle is tied up with // the allocator as a whole. diff --git a/src.mk b/src.mk index 802fc6138..f357cdd93 100644 --- a/src.mk +++ b/src.mk @@ -145,6 +145,7 @@ LIB_SOURCES = \ util/options_sanity_check.cc \ util/perf_context.cc \ util/perf_level.cc \ + util/random.cc \ util/rate_limiter.cc \ util/skiplistrep.cc \ util/slice.cc \ diff --git a/util/random.cc b/util/random.cc new file mode 100644 index 000000000..37e88e19f --- /dev/null +++ b/util/random.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// + +#include "util/random.h" + +#include +#include +#include + +#include "port/likely.h" +#include "util/thread_local.h" + +#if ROCKSDB_SUPPORT_THREAD_LOCAL +#define STORAGE_DECL static __thread +#else +#define STORAGE_DECL static +#endif + +namespace rocksdb { + +Random* Random::GetTLSInstance() { + STORAGE_DECL Random* tls_instance; + STORAGE_DECL std::aligned_storage::type tls_instance_bytes; + + auto rv = tls_instance; + if (UNLIKELY(rv == nullptr)) { + const pthread_t self = pthread_self(); + uint32_t seed = 0; + memcpy(&seed, &self, sizeof(seed)); + rv = new (&tls_instance_bytes) Random(seed); + tls_instance = rv; + } + return rv; +} + +} // namespace rocksdb diff --git a/util/random.h b/util/random.h index e5b331500..f259db07d 100644 --- a/util/random.h +++ b/util/random.h @@ -18,12 +18,22 @@ namespace rocksdb { // package. class Random { private: + static constexpr uint32_t M = 2147483647L; // 2^31-1 + static constexpr uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 + uint32_t seed_; + + static uint32_t GoodSeed(uint32_t s) { return (s & M) != 0 ? (s & M) : 1; } + public: - explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { } + // This is the largest value that can be returned from Next() + static constexpr uint32_t kMaxNext = M; + + explicit Random(uint32_t s) : seed_(GoodSeed(s)) {} + + void Reset(uint32_t s) { seed_ = GoodSeed(s); } + uint32_t Next() { - static const uint32_t M = 2147483647L; // 2^31-1 - static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 // We are computing // seed_ = (seed_ * A) % M, where M = 2^31-1 // @@ -42,6 +52,7 @@ class Random { } return seed_; } + // Returns a uniformly distributed value in the range [0..n-1] // REQUIRES: n > 0 uint32_t Uniform(int n) { return Next() % n; } @@ -56,6 +67,10 @@ class Random { uint32_t Skewed(int max_log) { return Uniform(1 << Uniform(max_log + 1)); } + + // Returns a Random instance for use by the current thread without + // additional locking + static Random* GetTLSInstance(); }; // A simple 64bit random number generator based on std::mt19937_64