Cache simulator: Add a ghost cache for admission control and a hybrid row-block cache. (#5534)

Summary:
This PR adds a ghost cache for admission control. Specifically, it admits an entry on its second access.
It also adds a hybrid row-block cache that caches the referenced key-value pairs of a Get/MultiGet request instead of its blocks.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5534

Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32

Differential Revision: D16101124

Pulled By: HaoyuHuang

fbshipit-source-id: b99edda6418a888e94eb40f71ece45d375e234b1
This commit is contained in:
haoyuhuang 2019-07-11 12:40:08 -07:00 committed by Facebook Github Bot
parent 82d8ca8ade
commit 1a59b6e2a9
11 changed files with 687 additions and 54 deletions

View File

@ -1006,6 +1006,7 @@ if(WITH_TESTS)
utilities/options/options_util_test.cc utilities/options/options_util_test.cc
utilities/persistent_cache/hash_table_test.cc utilities/persistent_cache/hash_table_test.cc
utilities/persistent_cache/persistent_cache_test.cc utilities/persistent_cache/persistent_cache_test.cc
utilities/simulator_cache/cache_simulator_test.cc
utilities/simulator_cache/sim_cache_test.cc utilities/simulator_cache/sim_cache_test.cc
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc utilities/table_properties_collectors/compact_on_deletion_collector_test.cc
utilities/transactions/optimistic_transaction_test.cc utilities/transactions/optimistic_transaction_test.cc

View File

@ -510,6 +510,7 @@ TESTS = \
cassandra_serialize_test \ cassandra_serialize_test \
ttl_test \ ttl_test \
backupable_db_test \ backupable_db_test \
cache_simulator_test \
sim_cache_test \ sim_cache_test \
version_edit_test \ version_edit_test \
version_set_test \ version_set_test \
@ -1321,6 +1322,9 @@ backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TE
checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS) checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)
cache_simulator_test: utilities/simulator_cache/cache_simulator_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)

View File

@ -423,6 +423,11 @@ ROCKS_TESTS = [
"cache/cache_test.cc", "cache/cache_test.cc",
"serial", "serial",
], ],
[
"cache_simulator_test",
"utilities/simulator_cache/cache_simulator_test.cc",
"serial",
],
[ [
"cassandra_format_test", "cassandra_format_test",
"utilities/cassandra/cassandra_format_test.cc", "utilities/cassandra/cassandra_format_test.cc",

1
src.mk
View File

@ -405,6 +405,7 @@ MAIN_SOURCES = \
utilities/object_registry_test.cc \ utilities/object_registry_test.cc \
utilities/option_change_migration/option_change_migration_test.cc \ utilities/option_change_migration/option_change_migration_test.cc \
utilities/options/options_util_test.cc \ utilities/options/options_util_test.cc \
utilities/simulator_cache/cache_simulator_test.cc \
utilities/simulator_cache/sim_cache_test.cc \ utilities/simulator_cache/sim_cache_test.cc \
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \ utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \
utilities/transactions/optimistic_transaction_test.cc \ utilities/transactions/optimistic_transaction_test.cc \

View File

@ -23,9 +23,12 @@ DEFINE_string(
block_cache_sim_config_path, "", block_cache_sim_config_path, "",
"The config file path. One cache configuration per line. The format of a " "The config file path. One cache configuration per line. The format of a "
"cache configuration is " "cache configuration is "
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " "cache_name,num_shard_bits,ghost_capacity,cache_capacity_1,...,cache_"
"cache_name is lru or lru_priority. cache_capacity can be xK, xM or xG " "capacity_N. Supported cache names are lru, lru_priority, lru_hybrid, and "
"where x is a positive number."); "lru_hybrid_no_insert_on_row_miss. User may also add a prefix 'ghost_' to "
"a cache_name to add a ghost cache in front of the real cache. "
"ghost_capacity and cache_capacity can be xK, xM or xG where x is a "
"positive number.");
DEFINE_int32(block_cache_trace_downsample_ratio, 1, DEFINE_int32(block_cache_trace_downsample_ratio, 1,
"The trace collected accesses on one in every " "The trace collected accesses on one in every "
"block_cache_trace_downsample_ratio blocks. We scale " "block_cache_trace_downsample_ratio blocks. We scale "
@ -104,6 +107,10 @@ const std::string kGroupbyAll = "all";
const std::set<std::string> kGroupbyLabels{ const std::set<std::string> kGroupbyLabels{
kGroupbyBlock, kGroupbyColumnFamily, kGroupbySSTFile, kGroupbyLevel, kGroupbyBlock, kGroupbyColumnFamily, kGroupbySSTFile, kGroupbyLevel,
kGroupbyBlockType, kGroupbyCaller, kGroupbyAll}; kGroupbyBlockType, kGroupbyCaller, kGroupbyAll};
const std::string kSupportedCacheNames =
" lru ghost_lru lru_priority ghost_lru_priority lru_hybrid "
"ghost_lru_hybrid lru_hybrid_no_insert_on_row_miss "
"ghost_lru_hybrid_no_insert_on_row_miss ";
std::string block_type_to_string(TraceType type) { std::string block_type_to_string(TraceType type) {
switch (type) { switch (type) {
@ -194,7 +201,8 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
} }
// Write header. // Write header.
const std::string header = const std::string header =
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses"; "cache_name,num_shard_bits,ghost_capacity,capacity,miss_ratio,total_"
"accesses";
out << header << std::endl; out << header << std::endl;
for (auto const& config_caches : cache_simulator_->sim_caches()) { for (auto const& config_caches : cache_simulator_->sim_caches()) {
const CacheConfiguration& config = config_caches.first; const CacheConfiguration& config = config_caches.first;
@ -205,6 +213,8 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
out << ","; out << ",";
out << config.num_shard_bits; out << config.num_shard_bits;
out << ","; out << ",";
out << config.ghost_cache_capacity;
out << ",";
out << config.cache_capacities[i]; out << config.cache_capacities[i];
out << ","; out << ",";
out << std::fixed << std::setprecision(4) << miss_ratio; out << std::fixed << std::setprecision(4) << miss_ratio;
@ -993,18 +1003,21 @@ std::vector<CacheConfiguration> parse_cache_config_file(
config_strs.push_back(substr); config_strs.push_back(substr);
} }
// Sanity checks. // Sanity checks.
if (config_strs.size() < 3) { if (config_strs.size() < 4) {
fprintf(stderr, "Invalid cache simulator configuration %s\n", fprintf(stderr, "Invalid cache simulator configuration %s\n",
line.c_str()); line.c_str());
exit(1); exit(1);
} }
if (config_strs[0] != "lru") { if (kSupportedCacheNames.find(" " + config_strs[0] + " ") ==
fprintf(stderr, "We only support LRU cache %s\n", line.c_str()); std::string::npos) {
fprintf(stderr, "Invalid cache name %s. Supported cache names are %s\n",
line.c_str(), kSupportedCacheNames.c_str());
exit(1); exit(1);
} }
cache_config.cache_name = config_strs[0]; cache_config.cache_name = config_strs[0];
cache_config.num_shard_bits = ParseUint32(config_strs[1]); cache_config.num_shard_bits = ParseUint32(config_strs[1]);
for (uint32_t i = 2; i < config_strs.size(); i++) { cache_config.ghost_cache_capacity = ParseUint64(config_strs[2]);
for (uint32_t i = 3; i < config_strs.size(); i++) {
uint64_t capacity = ParseUint64(config_strs[i]); uint64_t capacity = ParseUint64(config_strs[i]);
if (capacity == 0) { if (capacity == 0) {
fprintf(stderr, "Invalid cache capacity %s, %s\n", fprintf(stderr, "Invalid cache capacity %s, %s\n",

View File

@ -205,7 +205,7 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
} }
{ {
// Generate a cache sim config. // Generate a cache sim config.
std::string config = "lru,1,1K,1M,1G"; std::string config = "lru,1,0,1K,1M,1G";
std::ofstream out(block_cache_sim_config_path_); std::ofstream out(block_cache_sim_config_path_);
ASSERT_TRUE(out.is_open()); ASSERT_TRUE(out.is_open());
out << config << std::endl; out << config << std::endl;
@ -230,14 +230,15 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
getline(ss, substr, ','); getline(ss, substr, ',');
result_strs.push_back(substr); result_strs.push_back(substr);
} }
ASSERT_EQ(5, result_strs.size()); ASSERT_EQ(6, result_strs.size());
ASSERT_LT(config_index, expected_capacities.size()); ASSERT_LT(config_index, expected_capacities.size());
ASSERT_EQ("lru", result_strs[0]); // cache_name ASSERT_EQ("lru", result_strs[0]); // cache_name
ASSERT_EQ("1", result_strs[1]); // num_shard_bits ASSERT_EQ("1", result_strs[1]); // num_shard_bits
ASSERT_EQ("0", result_strs[2]); // ghost_cache_capacity
ASSERT_EQ(std::to_string(expected_capacities[config_index]), ASSERT_EQ(std::to_string(expected_capacities[config_index]),
result_strs[2]); // cache_capacity result_strs[3]); // cache_capacity
ASSERT_EQ("100.0000", result_strs[3]); // miss_ratio ASSERT_EQ("100.0000", result_strs[4]); // miss_ratio
ASSERT_EQ("50", result_strs[4]); // number of accesses. ASSERT_EQ("50", result_strs[5]); // number of accesses.
config_index++; config_index++;
} }
ASSERT_EQ(expected_capacities.size(), config_index); ASSERT_EQ(expected_capacities.size(), config_index);

View File

@ -45,6 +45,14 @@ bool BlockCacheTraceHelper::ShouldTraceGetId(TableReaderCaller caller) {
caller == TableReaderCaller::kUserMultiGet; caller == TableReaderCaller::kUserMultiGet;
} }
bool BlockCacheTraceHelper::IsUserAccess(TableReaderCaller caller) {
return caller == TableReaderCaller::kUserGet ||
caller == TableReaderCaller::kUserMultiGet ||
caller == TableReaderCaller::kUserIterator ||
caller == TableReaderCaller::kUserApproximateSize ||
caller == TableReaderCaller::kUserVerifyChecksum;
}
BlockCacheTraceWriter::BlockCacheTraceWriter( BlockCacheTraceWriter::BlockCacheTraceWriter(
Env* env, const TraceOptions& trace_options, Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) std::unique_ptr<TraceWriter>&& trace_writer)

View File

@ -23,6 +23,7 @@ class BlockCacheTraceHelper {
static bool ShouldTraceReferencedKey(TraceType block_type, static bool ShouldTraceReferencedKey(TraceType block_type,
TableReaderCaller caller); TableReaderCaller caller);
static bool ShouldTraceGetId(TableReaderCaller caller); static bool ShouldTraceGetId(TableReaderCaller caller);
static bool IsUserAccess(TableReaderCaller caller);
static const std::string kUnknownColumnFamilyName; static const std::string kUnknownColumnFamilyName;
static const uint64_t kReservedGetId; static const uint64_t kReservedGetId;

View File

@ -4,42 +4,177 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include "utilities/simulator_cache/cache_simulator.h" #include "utilities/simulator_cache/cache_simulator.h"
#include "db/dbformat.h"
namespace rocksdb { namespace rocksdb {
CacheSimulator::CacheSimulator(std::shared_ptr<SimCache> sim_cache)
namespace {
const std::string kGhostCachePrefix = "ghost_";
}
GhostCache::GhostCache(std::shared_ptr<Cache> sim_cache)
: sim_cache_(sim_cache) {} : sim_cache_(sim_cache) {}
bool GhostCache::Admit(const Slice& lookup_key) {
auto handle = sim_cache_->Lookup(lookup_key);
if (handle != nullptr) {
sim_cache_->Release(handle);
return true;
}
sim_cache_->Insert(lookup_key, /*value=*/nullptr, lookup_key.size(),
/*deleter=*/nullptr, /*handle=*/nullptr);
return false;
}
CacheSimulator::CacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
std::shared_ptr<Cache> sim_cache)
: ghost_cache_(std::move(ghost_cache)), sim_cache_(sim_cache) {}
void CacheSimulator::Access(const BlockCacheTraceRecord& access) { void CacheSimulator::Access(const BlockCacheTraceRecord& access) {
bool admit = true;
const bool is_user_access =
BlockCacheTraceHelper::IsUserAccess(access.caller);
bool is_cache_miss = true;
if (ghost_cache_ && access.no_insert == Boolean::kFalse) {
admit = ghost_cache_->Admit(access.block_key);
}
auto handle = sim_cache_->Lookup(access.block_key); auto handle = sim_cache_->Lookup(access.block_key);
if (handle == nullptr && !access.no_insert) { if (handle != nullptr) {
sim_cache_->Release(handle);
is_cache_miss = false;
} else {
if (access.no_insert == Boolean::kFalse && admit) {
sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size,
/*deleter=*/nullptr, /*handle=*/nullptr); /*deleter=*/nullptr, /*handle=*/nullptr);
} }
} }
UpdateMetrics(is_user_access, is_cache_miss);
}
void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) { void CacheSimulator::UpdateMetrics(bool is_user_access, bool is_cache_miss) {
auto handle = sim_cache_->Lookup(access.block_key); num_accesses_ += 1;
if (handle == nullptr && !access.no_insert) { if (is_cache_miss) {
Cache::Priority priority = Cache::Priority::LOW; num_misses_ += 1;
}
if (is_user_access) {
user_accesses_ += 1;
if (is_cache_miss) {
user_misses_ += 1;
}
}
}
Cache::Priority PrioritizedCacheSimulator::ComputeBlockPriority(
const BlockCacheTraceRecord& access) const {
if (access.block_type == TraceType::kBlockTraceFilterBlock || if (access.block_type == TraceType::kBlockTraceFilterBlock ||
access.block_type == TraceType::kBlockTraceIndexBlock || access.block_type == TraceType::kBlockTraceIndexBlock ||
access.block_type == TraceType::kBlockTraceUncompressionDictBlock) { access.block_type == TraceType::kBlockTraceUncompressionDictBlock) {
priority = Cache::Priority::HIGH; return Cache::Priority::HIGH;
} }
sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size, return Cache::Priority::LOW;
}
void PrioritizedCacheSimulator::AccessKVPair(
const Slice& key, uint64_t value_size, Cache::Priority priority,
bool no_insert, bool is_user_access, bool* is_cache_miss, bool* admitted,
bool update_metrics) {
assert(is_cache_miss);
assert(admitted);
*is_cache_miss = true;
*admitted = true;
if (ghost_cache_ && !no_insert) {
*admitted = ghost_cache_->Admit(key);
}
auto handle = sim_cache_->Lookup(key);
if (handle != nullptr) {
sim_cache_->Release(handle);
*is_cache_miss = false;
} else if (!no_insert && *admitted && value_size > 0) {
sim_cache_->Insert(key, /*value=*/nullptr, value_size,
/*deleter=*/nullptr, /*handle=*/nullptr, priority); /*deleter=*/nullptr, /*handle=*/nullptr, priority);
} }
if (update_metrics) {
UpdateMetrics(is_user_access, *is_cache_miss);
}
} }
double CacheSimulator::miss_ratio() { void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) {
uint64_t hits = sim_cache_->get_hit_counter(); bool is_cache_miss = true;
uint64_t misses = sim_cache_->get_miss_counter(); bool admitted = true;
uint64_t accesses = hits + misses; AccessKVPair(access.block_key, access.block_size,
return static_cast<double>(misses * 100.0 / accesses); ComputeBlockPriority(access), access.no_insert,
BlockCacheTraceHelper::IsUserAccess(access.caller),
&is_cache_miss, &admitted, /*update_metrics=*/true);
} }
uint64_t CacheSimulator::total_accesses() { std::string HybridRowBlockCacheSimulator::ComputeRowKey(
return sim_cache_->get_hit_counter() + sim_cache_->get_miss_counter(); const BlockCacheTraceRecord& access) {
assert(access.get_id != BlockCacheTraceHelper::kReservedGetId);
Slice key;
if (access.referenced_key_exist_in_block == Boolean::kTrue) {
key = ExtractUserKey(access.referenced_key);
} else {
key = access.referenced_key;
}
return std::to_string(access.sst_fd_number) + "_" + key.ToString();
}
void HybridRowBlockCacheSimulator::Access(const BlockCacheTraceRecord& access) {
bool is_cache_miss = true;
bool admitted = true;
if (access.get_id != BlockCacheTraceHelper::kReservedGetId) {
// This is a Get/MultiGet request.
const std::string& row_key = ComputeRowKey(access);
if (getid_getkeys_map_[access.get_id].find(row_key) ==
getid_getkeys_map_[access.get_id].end()) {
// This is the first time that this key is accessed. Look up the key-value
// pair first. Do not update the miss/accesses metrics here since it will
// be updated later.
AccessKVPair(row_key, access.referenced_data_size, Cache::Priority::HIGH,
/*no_insert=*/false,
/*is_user_access=*/true, &is_cache_miss, &admitted,
/*update_metrics=*/false);
InsertResult result = InsertResult::NO_INSERT;
if (admitted && access.referenced_data_size > 0) {
result = InsertResult::INSERTED;
} else if (admitted) {
result = InsertResult::ADMITTED;
}
getid_getkeys_map_[access.get_id][row_key] =
std::make_pair(is_cache_miss, result);
}
std::pair<bool, InsertResult> miss_inserted =
getid_getkeys_map_[access.get_id][row_key];
if (!miss_inserted.first) {
// This is a cache hit. Skip future accesses to its index/filter/data
// blocks. These block lookups are unnecessary if we observe a hit for the
// referenced key-value pair already. Thus, we treat these lookups as
// hits. This is also to ensure the total number of accesses are the same
// when comparing to other policies.
UpdateMetrics(/*is_user_access=*/true, /*is_cache_miss=*/false);
return;
}
// The key-value pair observes a cache miss. We need to access its
// index/filter/data blocks.
AccessKVPair(
access.block_key, access.block_type, ComputeBlockPriority(access),
/*no_insert=*/!insert_blocks_upon_row_kvpair_miss_ || access.no_insert,
/*is_user_access=*/true, &is_cache_miss, &admitted,
/*update_metrics=*/true);
if (access.referenced_data_size > 0 &&
miss_inserted.second == InsertResult::ADMITTED) {
sim_cache_->Insert(
row_key, /*value=*/nullptr, access.referenced_data_size,
/*deleter=*/nullptr, /*handle=*/nullptr, Cache::Priority::HIGH);
getid_getkeys_map_[access.get_id][row_key] =
std::make_pair(true, InsertResult::INSERTED);
}
return;
}
AccessKVPair(access.block_key, access.block_size,
ComputeBlockPriority(access), access.no_insert,
BlockCacheTraceHelper::IsUserAccess(access.caller),
&is_cache_miss, &admitted, /*update_metrics=*/true);
} }
BlockCacheTraceSimulator::BlockCacheTraceSimulator( BlockCacheTraceSimulator::BlockCacheTraceSimulator(
@ -56,18 +191,41 @@ Status BlockCacheTraceSimulator::InitializeCaches() {
// 1/'downsample_ratio' blocks. // 1/'downsample_ratio' blocks.
uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_; uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_;
std::shared_ptr<CacheSimulator> sim_cache; std::shared_ptr<CacheSimulator> sim_cache;
if (config.cache_name == "lru") { std::unique_ptr<GhostCache> ghost_cache;
sim_cache = std::make_shared<CacheSimulator>(NewSimCache( std::string cache_name = config.cache_name;
if (cache_name.find(kGhostCachePrefix) != std::string::npos) {
ghost_cache.reset(new GhostCache(
NewLRUCache(config.ghost_cache_capacity, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
cache_name = cache_name.substr(kGhostCachePrefix.size());
}
if (cache_name == "lru") {
sim_cache = std::make_shared<CacheSimulator>(
std::move(ghost_cache),
NewLRUCache(simulate_cache_capacity, config.num_shard_bits, NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false, /*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0), /*high_pri_pool_ratio=*/0));
/*real_cache=*/nullptr, config.num_shard_bits)); } else if (cache_name == "lru_priority") {
} else if (config.cache_name == "lru_priority") { sim_cache = std::make_shared<PrioritizedCacheSimulator>(
sim_cache = std::make_shared<PrioritizedCacheSimulator>(NewSimCache( std::move(ghost_cache),
NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0.5));
} else if (cache_name == "lru_hybrid") {
sim_cache = std::make_shared<HybridRowBlockCacheSimulator>(
std::move(ghost_cache),
NewLRUCache(simulate_cache_capacity, config.num_shard_bits, NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false, /*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0.5), /*high_pri_pool_ratio=*/0.5),
/*real_cache=*/nullptr, config.num_shard_bits)); /*insert_blocks_upon_row_kvpair_miss=*/true);
} else if (cache_name == "lru_hybrid_no_insert_on_row_miss") {
sim_cache = std::make_shared<HybridRowBlockCacheSimulator>(
std::move(ghost_cache),
NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0.5),
/*insert_blocks_upon_row_kvpair_miss=*/false);
} else { } else {
// Not supported. // Not supported.
return Status::InvalidArgument("Unknown cache name " + return Status::InvalidArgument("Unknown cache name " +

View File

@ -5,7 +5,6 @@
#pragma once #pragma once
#include "rocksdb/utilities/sim_cache.h"
#include "trace_replay/block_cache_tracer.h" #include "trace_replay/block_cache_tracer.h"
namespace rocksdb { namespace rocksdb {
@ -14,22 +13,46 @@ namespace rocksdb {
struct CacheConfiguration { struct CacheConfiguration {
std::string cache_name; // LRU. std::string cache_name; // LRU.
uint32_t num_shard_bits; uint32_t num_shard_bits;
uint64_t ghost_cache_capacity; // ghost cache capacity in bytes.
std::vector<uint64_t> std::vector<uint64_t>
cache_capacities; // simulate cache capacities in bytes. cache_capacities; // simulate cache capacities in bytes.
bool operator=(const CacheConfiguration& o) const { bool operator==(const CacheConfiguration& o) const {
return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits; return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
ghost_cache_capacity == o.ghost_cache_capacity;
} }
bool operator<(const CacheConfiguration& o) const { bool operator<(const CacheConfiguration& o) const {
return cache_name < o.cache_name || return cache_name < o.cache_name ||
(cache_name == o.cache_name && num_shard_bits < o.num_shard_bits); (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits) ||
(cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
ghost_cache_capacity < o.ghost_cache_capacity);
} }
}; };
// A ghost cache admits an entry on its second access.
class GhostCache {
public:
explicit GhostCache(std::shared_ptr<Cache> sim_cache);
~GhostCache() = default;
// No copy and move.
GhostCache(const GhostCache&) = delete;
GhostCache& operator=(const GhostCache&) = delete;
GhostCache(GhostCache&&) = delete;
GhostCache& operator=(GhostCache&&) = delete;
// Returns true if the lookup_key is in the ghost cache.
// Returns false otherwise.
bool Admit(const Slice& lookup_key);
private:
std::shared_ptr<Cache> sim_cache_;
};
// A cache simulator that runs against a block cache trace. // A cache simulator that runs against a block cache trace.
class CacheSimulator { class CacheSimulator {
public: public:
CacheSimulator(std::shared_ptr<SimCache> sim_cache); CacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
std::shared_ptr<Cache> sim_cache);
virtual ~CacheSimulator() = default; virtual ~CacheSimulator() = default;
// No copy and move. // No copy and move.
CacheSimulator(const CacheSimulator&) = delete; CacheSimulator(const CacheSimulator&) = delete;
@ -38,12 +61,37 @@ class CacheSimulator {
CacheSimulator& operator=(CacheSimulator&&) = delete; CacheSimulator& operator=(CacheSimulator&&) = delete;
virtual void Access(const BlockCacheTraceRecord& access); virtual void Access(const BlockCacheTraceRecord& access);
void reset_counter() { sim_cache_->reset_counter(); } void reset_counter() {
double miss_ratio(); num_misses_ = 0;
uint64_t total_accesses(); num_accesses_ = 0;
user_accesses_ = 0;
user_misses_ = 0;
}
double miss_ratio() const {
if (num_accesses_ == 0) {
return -1;
}
return static_cast<double>(num_misses_ * 100.0 / num_accesses_);
}
uint64_t total_accesses() const { return num_accesses_; }
double user_miss_ratio() const {
if (user_accesses_ == 0) {
return -1;
}
return static_cast<double>(user_misses_ * 100.0 / user_accesses_);
}
uint64_t user_accesses() const { return user_accesses_; }
protected: protected:
std::shared_ptr<SimCache> sim_cache_; void UpdateMetrics(bool is_user_access, bool is_cache_miss);
std::unique_ptr<GhostCache> ghost_cache_;
std::shared_ptr<Cache> sim_cache_;
uint64_t num_accesses_ = 0;
uint64_t num_misses_ = 0;
uint64_t user_accesses_ = 0;
uint64_t user_misses_ = 0;
}; };
// A prioritized cache simulator that runs against a block cache trace. // A prioritized cache simulator that runs against a block cache trace.
@ -51,9 +99,65 @@ class CacheSimulator {
// priority in the cache. // priority in the cache.
class PrioritizedCacheSimulator : public CacheSimulator { class PrioritizedCacheSimulator : public CacheSimulator {
public: public:
PrioritizedCacheSimulator(std::shared_ptr<SimCache> sim_cache) PrioritizedCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
: CacheSimulator(sim_cache) {} std::shared_ptr<Cache> sim_cache)
: CacheSimulator(std::move(ghost_cache), sim_cache) {}
void Access(const BlockCacheTraceRecord& access) override; void Access(const BlockCacheTraceRecord& access) override;
protected:
// Access the key-value pair and returns true upon a cache miss.
void AccessKVPair(const Slice& key, uint64_t value_size,
Cache::Priority priority, bool no_insert,
bool is_user_access, bool* is_cache_miss, bool* admitted,
bool update_metrics);
Cache::Priority ComputeBlockPriority(
const BlockCacheTraceRecord& access) const;
};
// A hybrid row and block cache simulator. It looks up/inserts key-value pairs
// referenced by Get/MultiGet requests, and not their accessed index/filter/data
// blocks.
//
// Upon a Get/MultiGet request, it looks up the referenced key first.
// If it observes a cache hit, future block accesses on this key-value pair is
// skipped since the request is served already. Otherwise, it continues to look
// up/insert its index/filter/data blocks. It also inserts the referenced
// key-value pair in the cache for future lookups.
class HybridRowBlockCacheSimulator : public PrioritizedCacheSimulator {
public:
HybridRowBlockCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
std::shared_ptr<Cache> sim_cache,
bool insert_blocks_upon_row_kvpair_miss)
: PrioritizedCacheSimulator(std::move(ghost_cache), sim_cache),
insert_blocks_upon_row_kvpair_miss_(
insert_blocks_upon_row_kvpair_miss) {}
void Access(const BlockCacheTraceRecord& access) override;
private:
// Row key is a concatenation of the access's fd_number and the referenced
// user key.
// TODO(haoyu): the row key should contain sequence number.
std::string ComputeRowKey(const BlockCacheTraceRecord& access);
enum InsertResult : char {
INSERTED,
ADMITTED,
NO_INSERT,
};
// A map stores get_id to a map of row keys. For each row key, it stores a
// boolean and an enum. The first bool is true when we observe a miss upon the
// first time we encounter the row key. The second arg is INSERTED when the
// kv-pair has been inserted into the cache, ADMITTED if it should be inserted
// but haven't been, NO_INSERT if it should not be inserted.
//
// A kv-pair is in ADMITTED state when we encounter this kv-pair but do not
// know its size. This may happen if the first access on the referenced key is
// an index/filter block.
std::map<uint64_t, std::map<std::string, std::pair<bool, InsertResult>>>
getid_getkeys_map_;
bool insert_blocks_upon_row_kvpair_miss_;
}; };
// A block cache simulator that reports miss ratio curves given a set of cache // A block cache simulator that reports miss ratio curves given a set of cache

View File

@ -0,0 +1,337 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "utilities/simulator_cache/cache_simulator.h"
#include <cstdlib>
#include "rocksdb/env.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
namespace rocksdb {
namespace {
const std::string kBlockKeyPrefix = "test-block-";
const std::string kRefKeyPrefix = "test-get-";
const uint64_t kGetId = 1;
const uint64_t kGetBlockId = 100;
const uint64_t kCompactionBlockId = 1000;
const uint64_t kCacheSize = 1024 * 1024 * 1024;
const uint64_t kGhostCacheSize = 1024 * 1024;
} // namespace
class CacheSimulatorTest : public testing::Test {
public:
const size_t kNumBlocks = 5;
const size_t kValueSize = 1000;
CacheSimulatorTest() { env_ = rocksdb::Env::Default(); }
BlockCacheTraceRecord GenerateGetRecord(uint64_t getid) {
BlockCacheTraceRecord record;
record.block_type = TraceType::kBlockTraceDataBlock;
record.block_size = 4096;
record.block_key = kBlockKeyPrefix + std::to_string(kGetBlockId);
record.access_timestamp = env_->NowMicros();
record.cf_id = 0;
record.cf_name = "test";
record.caller = TableReaderCaller::kUserGet;
record.level = 6;
record.sst_fd_number = kGetBlockId;
record.get_id = getid;
record.is_cache_hit = Boolean::kFalse;
record.no_insert = Boolean::kFalse;
record.referenced_key =
kRefKeyPrefix + std::to_string(kGetId) + std::string(8, 'c');
record.referenced_key_exist_in_block = Boolean::kTrue;
record.referenced_data_size = 100;
record.num_keys_in_block = 300;
return record;
}
BlockCacheTraceRecord GenerateCompactionRecord() {
BlockCacheTraceRecord record;
record.block_type = TraceType::kBlockTraceDataBlock;
record.block_size = 4096;
record.block_key = kBlockKeyPrefix + std::to_string(kCompactionBlockId);
record.access_timestamp = env_->NowMicros();
record.cf_id = 0;
record.cf_name = "test";
record.caller = TableReaderCaller::kCompaction;
record.level = 6;
record.sst_fd_number = kCompactionBlockId;
record.is_cache_hit = Boolean::kFalse;
record.no_insert = Boolean::kTrue;
return record;
}
Env* env_;
};
TEST_F(CacheSimulatorTest, GhostCache) {
const std::string key1 = "test1";
const std::string key2 = "test2";
std::unique_ptr<GhostCache> ghost_cache(new GhostCache(
NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
EXPECT_FALSE(ghost_cache->Admit(key1));
EXPECT_TRUE(ghost_cache->Admit(key1));
EXPECT_TRUE(ghost_cache->Admit(key1));
EXPECT_FALSE(ghost_cache->Admit(key2));
EXPECT_TRUE(ghost_cache->Admit(key2));
}
TEST_F(CacheSimulatorTest, CacheSimulator) {
const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId);
const BlockCacheTraceRecord& compaction_access = GenerateCompactionRecord();
std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
std::unique_ptr<CacheSimulator> cache_simulator(
new CacheSimulator(nullptr, sim_cache));
cache_simulator->Access(access);
cache_simulator->Access(access);
ASSERT_EQ(2, cache_simulator->total_accesses());
ASSERT_EQ(50, cache_simulator->miss_ratio());
ASSERT_EQ(2, cache_simulator->user_accesses());
ASSERT_EQ(50, cache_simulator->user_miss_ratio());
cache_simulator->Access(compaction_access);
cache_simulator->Access(compaction_access);
ASSERT_EQ(4, cache_simulator->total_accesses());
ASSERT_EQ(75, cache_simulator->miss_ratio());
ASSERT_EQ(2, cache_simulator->user_accesses());
ASSERT_EQ(50, cache_simulator->user_miss_ratio());
cache_simulator->reset_counter();
ASSERT_EQ(0, cache_simulator->total_accesses());
ASSERT_EQ(-1, cache_simulator->miss_ratio());
auto handle = sim_cache->Lookup(access.block_key);
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
handle = sim_cache->Lookup(compaction_access.block_key);
ASSERT_EQ(nullptr, handle);
}
TEST_F(CacheSimulatorTest, GhostCacheSimulator) {
const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId);
std::unique_ptr<GhostCache> ghost_cache(new GhostCache(
NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
std::unique_ptr<CacheSimulator> cache_simulator(new CacheSimulator(
std::move(ghost_cache),
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
cache_simulator->Access(access);
cache_simulator->Access(access);
ASSERT_EQ(2, cache_simulator->total_accesses());
// Both of them will be miss since we have a ghost cache.
ASSERT_EQ(100, cache_simulator->miss_ratio());
}
TEST_F(CacheSimulatorTest, PrioritizedCacheSimulator) {
const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId);
std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
std::unique_ptr<PrioritizedCacheSimulator> cache_simulator(
new PrioritizedCacheSimulator(nullptr, sim_cache));
cache_simulator->Access(access);
cache_simulator->Access(access);
ASSERT_EQ(2, cache_simulator->total_accesses());
ASSERT_EQ(50, cache_simulator->miss_ratio());
auto handle = sim_cache->Lookup(access.block_key);
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
}
TEST_F(CacheSimulatorTest, GhostPrioritizedCacheSimulator) {
const BlockCacheTraceRecord& access = GenerateGetRecord(kGetId);
std::unique_ptr<GhostCache> ghost_cache(new GhostCache(
NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
std::unique_ptr<PrioritizedCacheSimulator> cache_simulator(
new PrioritizedCacheSimulator(
std::move(ghost_cache),
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
cache_simulator->Access(access);
cache_simulator->Access(access);
ASSERT_EQ(2, cache_simulator->total_accesses());
// Both of them will be miss since we have a ghost cache.
ASSERT_EQ(100, cache_simulator->miss_ratio());
}
TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulator) {
uint64_t block_id = 100;
BlockCacheTraceRecord first_get = GenerateGetRecord(kGetId);
BlockCacheTraceRecord second_get = GenerateGetRecord(kGetId + 1);
second_get.referenced_data_size = 0;
second_get.referenced_key_exist_in_block = Boolean::kFalse;
second_get.referenced_key = kRefKeyPrefix + std::to_string(kGetId);
BlockCacheTraceRecord third_get = GenerateGetRecord(kGetId + 2);
third_get.referenced_data_size = 0;
third_get.referenced_key_exist_in_block = Boolean::kFalse;
third_get.referenced_key = kRefKeyPrefix + "third_get";
// We didn't find the referenced key in the third get.
third_get.referenced_key_exist_in_block = Boolean::kFalse;
third_get.referenced_data_size = 0;
std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
std::unique_ptr<HybridRowBlockCacheSimulator> cache_simulator(
new HybridRowBlockCacheSimulator(
nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/true));
// The first get request accesses 10 blocks. We should only report 10 accesses
// and 100% miss.
for (uint32_t i = 0; i < 10; i++) {
first_get.block_key = kBlockKeyPrefix + std::to_string(block_id);
cache_simulator->Access(first_get);
block_id++;
}
ASSERT_EQ(10, cache_simulator->total_accesses());
ASSERT_EQ(100, cache_simulator->miss_ratio());
ASSERT_EQ(10, cache_simulator->user_accesses());
ASSERT_EQ(100, cache_simulator->user_miss_ratio());
auto handle =
sim_cache->Lookup(ExtractUserKey(std::to_string(first_get.sst_fd_number) +
"_" + first_get.referenced_key));
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
for (uint32_t i = 100; i < block_id; i++) {
handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i));
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
}
// The second get request accesses the same key. We should report 15
// access and 66% miss, 10 misses with 15 accesses.
// We do not consider these 5 block lookups as misses since the row hits the
// cache.
for (uint32_t i = 0; i < 5; i++) {
second_get.block_key = kBlockKeyPrefix + std::to_string(block_id);
cache_simulator->Access(second_get);
block_id++;
}
ASSERT_EQ(15, cache_simulator->total_accesses());
ASSERT_EQ(66, static_cast<uint64_t>(cache_simulator->miss_ratio()));
ASSERT_EQ(15, cache_simulator->user_accesses());
ASSERT_EQ(66, static_cast<uint64_t>(cache_simulator->user_miss_ratio()));
handle = sim_cache->Lookup(std::to_string(second_get.sst_fd_number) + "_" +
second_get.referenced_key);
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
for (uint32_t i = 100; i < block_id; i++) {
handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i));
if (i < 110) {
ASSERT_NE(nullptr, handle) << i;
sim_cache->Release(handle);
} else {
ASSERT_EQ(nullptr, handle) << i;
}
}
// The third get on a different key and does not have a size.
// This key should not be inserted into the cache.
for (uint32_t i = 0; i < 5; i++) {
third_get.block_key = kBlockKeyPrefix + std::to_string(block_id);
cache_simulator->Access(third_get);
block_id++;
}
ASSERT_EQ(20, cache_simulator->total_accesses());
ASSERT_EQ(75, static_cast<uint64_t>(cache_simulator->miss_ratio()));
ASSERT_EQ(20, cache_simulator->user_accesses());
ASSERT_EQ(75, static_cast<uint64_t>(cache_simulator->user_miss_ratio()));
// Assert that the third key is not inserted into the cache.
handle = sim_cache->Lookup(std::to_string(third_get.sst_fd_number) + "_" +
third_get.referenced_key);
ASSERT_EQ(nullptr, handle);
for (uint32_t i = 100; i < block_id; i++) {
if (i < 110 || i >= 115) {
handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i));
ASSERT_NE(nullptr, handle) << i;
sim_cache->Release(handle);
} else {
handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i));
ASSERT_EQ(nullptr, handle) << i;
}
}
}
TEST_F(CacheSimulatorTest, HybridRowBlockNoInsertCacheSimulator) {
uint64_t block_id = 100;
BlockCacheTraceRecord first_get = GenerateGetRecord(kGetId);
std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
std::unique_ptr<HybridRowBlockCacheSimulator> cache_simulator(
new HybridRowBlockCacheSimulator(
nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/false));
for (uint32_t i = 0; i < 9; i++) {
first_get.block_key = kBlockKeyPrefix + std::to_string(block_id);
cache_simulator->Access(first_get);
block_id++;
}
auto handle =
sim_cache->Lookup(ExtractUserKey(std::to_string(first_get.sst_fd_number) +
"_" + first_get.referenced_key));
ASSERT_NE(nullptr, handle);
sim_cache->Release(handle);
// All blocks are missing from the cache since insert_blocks_row_kvpair_misses
// is set to false.
for (uint32_t i = 100; i < block_id; i++) {
handle = sim_cache->Lookup(kBlockKeyPrefix + std::to_string(i));
ASSERT_EQ(nullptr, handle);
}
}
TEST_F(CacheSimulatorTest, GhostHybridRowBlockCacheSimulator) {
std::unique_ptr<GhostCache> ghost_cache(new GhostCache(
NewLRUCache(/*capacity=*/kGhostCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0)));
const BlockCacheTraceRecord& first_get = GenerateGetRecord(kGetId);
const BlockCacheTraceRecord& second_get = GenerateGetRecord(kGetId + 1);
const BlockCacheTraceRecord& third_get = GenerateGetRecord(kGetId + 2);
std::unique_ptr<HybridRowBlockCacheSimulator> cache_simulator(
new HybridRowBlockCacheSimulator(
std::move(ghost_cache),
NewLRUCache(/*capacity=*/kCacheSize, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0),
/*insert_blocks_row_kvpair_misses=*/false));
// Two get requests access the same key.
cache_simulator->Access(first_get);
cache_simulator->Access(second_get);
ASSERT_EQ(2, cache_simulator->total_accesses());
ASSERT_EQ(100, cache_simulator->miss_ratio());
ASSERT_EQ(2, cache_simulator->user_accesses());
ASSERT_EQ(100, cache_simulator->user_miss_ratio());
// We insert the key-value pair upon the second get request. A third get
// request should observe a hit.
for (uint32_t i = 0; i < 10; i++) {
cache_simulator->Access(third_get);
}
ASSERT_EQ(12, cache_simulator->total_accesses());
ASSERT_EQ(16, static_cast<uint64_t>(cache_simulator->miss_ratio()));
ASSERT_EQ(12, cache_simulator->user_accesses());
ASSERT_EQ(16, static_cast<uint64_t>(cache_simulator->user_miss_ratio()));
}
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}