rocksdb/cache/sharded_cache.h

104 lines
4.1 KiB
C
Raw Normal View History

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <atomic>
#include <string>
#include "port/port.h"
#include "rocksdb/cache.h"
#include "util/hash.h"
namespace rocksdb {
// Single cache shard interface.
class CacheShard {
public:
CacheShard() = default;
virtual ~CacheShard() = default;
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** handle, Cache::Priority priority) = 0;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;
virtual bool Ref(Cache::Handle* handle) = 0;
virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0;
virtual void Erase(const Slice& key, uint32_t hash) = 0;
virtual void SetCapacity(size_t capacity) = 0;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
virtual size_t GetUsage() const = 0;
virtual size_t GetPinnedUsage() const = 0;
virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) = 0;
virtual void EraseUnRefEntries() = 0;
virtual std::string GetPrintableOptions() const { return ""; }
};
// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
// shards will be created, with capacity split evenly to each of the shards.
// Keys are sharded by the highest num_shard_bits bits of hash value.
class ShardedCache : public Cache {
public:
ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr);
virtual ~ShardedCache() = default;
virtual const char* Name() const override = 0;
virtual CacheShard* GetShard(int shard) = 0;
virtual const CacheShard* GetShard(int shard) const = 0;
virtual void* Value(Handle* handle) override = 0;
virtual size_t GetCharge(Handle* handle) const = 0;
virtual uint32_t GetHash(Handle* handle) const = 0;
virtual void DisownData() override = 0;
virtual void SetCapacity(size_t capacity) override;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
virtual Status Insert(const Slice& key, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
Handle** handle, Priority priority) override;
virtual Handle* Lookup(const Slice& key, Statistics* stats) override;
virtual bool Ref(Handle* handle) override;
virtual bool Release(Handle* handle, bool force_erase = false) override;
virtual void Erase(const Slice& key) override;
virtual uint64_t NewId() override;
virtual size_t GetCapacity() const override;
virtual bool HasStrictCapacityLimit() const override;
virtual size_t GetUsage() const override;
virtual size_t GetUsage(Handle* handle) const override;
virtual size_t GetPinnedUsage() const override;
virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) override;
virtual void EraseUnRefEntries() override;
virtual std::string GetPrintableOptions() const override;
int GetNumShardBits() const { return num_shard_bits_; }
private:
static inline uint32_t HashSlice(const Slice& s) {
Consolidate hash function used for non-persistent data in a new function (#5155) Summary: Create new function NPHash64() and GetSliceNPHash64(), which are currently implemented using murmurhash. Replace the current direct call of murmurhash() to use the new functions if the hash results are not used in on-disk format. This will make it easier to try out or switch to alternative functions in the uses where data format compatibility doesn't need to be considered. This part shouldn't have any performance impact. Also, the sharded cache hash function is changed to the new format, because it falls into this categoery. It doesn't show visible performance impact in db_bench results. CPU showed by perf is increased from about 0.2% to 0.4% in an extreme benchmark setting (4KB blocks, no-compression, everything cached in block cache). We've known that the current hash function used, our own Hash() has serious hash quality problem. It can generate a lots of conflicts with similar input. In this use case, it means extra lock contention for reads from the same file. This slight CPU regression is worthy to me to counter the potential bad performance with hot keys. And hopefully this will get further improved in the future with a better hash function. cache_test's condition is relaxed a little bit to. The new hash is slightly more skewed in this use case, but I manually checked the data and see the hash results are still in a reasonable range. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5155 Differential Revision: D14834821 Pulled By: siying fbshipit-source-id: ec9a2c0a2f8ae4b54d08b13a5c2e9cc97aa80cb5
2019-04-08 13:24:29 -07:00
return static_cast<uint32_t>(GetSliceNPHash64(s));
}
uint32_t Shard(uint32_t hash) {
// Note, hash >> 32 yields hash in gcc, not the zero we expect!
return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
}
int num_shard_bits_;
mutable port::Mutex capacity_mutex_;
size_t capacity_;
bool strict_capacity_limit_;
std::atomic<uint64_t> last_id_;
};
extern int GetDefaultCacheShardBits(size_t capacity);
} // namespace rocksdb