Check for and disallow shared key space in block caches (#9172)

Summary:
We have three layers of block cache that often use the same key
but map to different physical data:
* BlockBasedTableOptions::block_cache
* BlockBasedTableOptions::block_cache_compressed
* BlockBasedTableOptions::persistent_cache

If any two of these happen to share an underlying implementation and key
space (insertion into one shows up in another), then memory safety is
broken. The simplest case is block_cache == block_cache_compressed.
(Credit mrambacher for asking about this case in a review.)

With this change, we explicitly check for overlap and preemptively and
safely fail with a Status code.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9172

Test Plan: test added. Crashes without new check

Reviewed By: anand1976

Differential Revision: D32465659

Pulled By: pdillinger

fbshipit-source-id: 3876b45b6dce6167e5a7a642725ddc86b96f8e40
This commit is contained in:
Peter Dillinger 2021-11-16 11:14:02 -08:00 committed by Facebook GitHub Bot
parent 28f54e71f3
commit f8c685c4fc
5 changed files with 265 additions and 5 deletions

View File

@ -15,6 +15,7 @@
* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`.
* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete.
* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer.
* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.)
### Behavior Changes
* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files.

View File

@ -7,6 +7,8 @@
#include <array>
#include <cstdint>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include "rocksdb/cache.h"
@ -90,7 +92,9 @@ struct RegisteredDeleter {
// These have global linkage to help ensure compiler optimizations do not
// break uniqueness for each <T,R>
static void Delete(const Slice& /* key */, void* value) {
delete static_cast<T*>(value);
// Supports T == Something[], unlike delete operator
std::default_delete<T>()(
static_cast<typename std::remove_extent<T>::type*>(value));
}
};
@ -98,9 +102,9 @@ template <CacheEntryRole R>
struct RegisteredNoopDeleter {
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
static void Delete(const Slice& /* key */, void* value) {
(void)value;
assert(value == nullptr);
static void Delete(const Slice& /* key */, void* /* value */) {
// Here was `assert(value == nullptr);` but we can also put pointers
// to static data in Cache, for testing at least.
}
};

View File

@ -336,6 +336,143 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
CheckCacheCounters(options, 1, 0, 1, 0);
CheckCompressedCacheCounters(options, 0, 1, 0, 0);
}
namespace {
class PersistentCacheFromCache : public PersistentCache {
public:
PersistentCacheFromCache(std::shared_ptr<Cache> cache, bool read_only)
: cache_(cache), read_only_(read_only) {}
Status Insert(const Slice& key, const char* data,
const size_t size) override {
if (read_only_) {
return Status::NotSupported();
}
std::unique_ptr<char[]> copy{new char[size]};
std::copy_n(data, size, copy.get());
Status s = cache_->Insert(
key, copy.get(), size,
GetCacheEntryDeleterForRole<char[], CacheEntryRole::kMisc>());
if (s.ok()) {
copy.release();
}
return s;
}
Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
size_t* size) override {
auto handle = cache_->Lookup(key);
if (handle) {
char* ptr = static_cast<char*>(cache_->Value(handle));
*size = cache_->GetCharge(handle);
data->reset(new char[*size]);
std::copy_n(ptr, *size, data->get());
cache_->Release(handle);
return Status::OK();
} else {
return Status::NotFound();
}
}
bool IsCompressed() override { return false; }
StatsType Stats() override { return StatsType(); }
std::string GetPrintableOptions() const override { return ""; }
uint64_t NewId() override { return cache_->NewId(); }
private:
std::shared_ptr<Cache> cache_;
bool read_only_;
};
class ReadOnlyCacheWrapper : public CacheWrapper {
using CacheWrapper::CacheWrapper;
using Cache::Insert;
Status Insert(const Slice& /*key*/, void* /*value*/, size_t /*charge*/,
void (*)(const Slice& key, void* value) /*deleter*/,
Handle** /*handle*/, Priority /*priority*/) override {
return Status::NotSupported();
}
};
} // namespace
TEST_F(DBBlockCacheTest, TestWithSameCompressed) {
auto table_options = GetTableOptions();
auto options = GetOptions(table_options);
InitTable(options);
std::shared_ptr<Cache> rw_cache{NewLRUCache(1000000)};
std::shared_ptr<PersistentCacheFromCache> rw_pcache{
new PersistentCacheFromCache(rw_cache, /*read_only*/ false)};
// Exercise some obscure behavior with read-only wrappers
std::shared_ptr<Cache> ro_cache{new ReadOnlyCacheWrapper(rw_cache)};
std::shared_ptr<PersistentCacheFromCache> ro_pcache{
new PersistentCacheFromCache(rw_cache, /*read_only*/ true)};
// Simple same pointer
table_options.block_cache = rw_cache;
table_options.block_cache_compressed = rw_cache;
table_options.persistent_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: block_cache same as block_cache_compressed not "
"currently supported, and would be bad for performance anyway");
// Other cases
table_options.block_cache = ro_cache;
table_options.block_cache_compressed = rw_cache;
table_options.persistent_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: block_cache and block_cache_compressed share "
"the same key space, which is not supported");
table_options.block_cache = rw_cache;
table_options.block_cache_compressed = ro_cache;
table_options.persistent_cache.reset();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: block_cache_compressed and block_cache share "
"the same key space, which is not supported");
table_options.block_cache = ro_cache;
table_options.block_cache_compressed.reset();
table_options.persistent_cache = rw_pcache;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: block_cache and persistent_cache share the same "
"key space, which is not supported");
table_options.block_cache = rw_cache;
table_options.block_cache_compressed.reset();
table_options.persistent_cache = ro_pcache;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: persistent_cache and block_cache share the same "
"key space, which is not supported");
table_options.block_cache.reset();
table_options.no_block_cache = true;
table_options.block_cache_compressed = ro_cache;
table_options.persistent_cache = rw_pcache;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: block_cache_compressed and persistent_cache "
"share the same key space, which is not supported");
table_options.block_cache.reset();
table_options.no_block_cache = true;
table_options.block_cache_compressed = rw_cache;
table_options.persistent_cache = ro_pcache;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ASSERT_EQ(TryReopen(options).ToString(),
"Invalid argument: persistent_cache and block_cache_compressed "
"share the same key space, which is not supported");
}
#endif // SNAPPY
#ifndef ROCKSDB_LITE

View File

@ -31,7 +31,7 @@ class PersistentCache {
// Insert to page cache
//
// page_key Identifier to identify a page uniquely across restarts
// data Page data
// data Page data to copy (caller retains ownership)
// size Size of the page
virtual Status Insert(const Slice& key, const char* data,
const size_t size) = 0;

View File

@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include "cache/cache_entry_roles.h"
#include "logging/logging.h"
#include "options/options_helper.h"
#include "port/port.h"
@ -23,6 +24,7 @@
#include "rocksdb/filter_policy.h"
#include "rocksdb/flush_block_policy.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/table.h"
#include "rocksdb/utilities/options_type.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/block_based/block_based_table_reader.h"
@ -488,6 +490,116 @@ Status BlockBasedTableFactory::PrepareOptions(const ConfigOptions& opts) {
return TableFactory::PrepareOptions(opts);
}
namespace {
// Different cache kinds use the same keys for physically different values, so
// they must not share an underlying key space with each other.
Status CheckCacheOptionCompatibility(const BlockBasedTableOptions& bbto) {
int cache_count = (bbto.block_cache != nullptr) +
(bbto.block_cache_compressed != nullptr) +
(bbto.persistent_cache != nullptr);
if (cache_count <= 1) {
// Nothing to share / overlap
return Status::OK();
}
// Simple pointer equality
if (bbto.block_cache == bbto.block_cache_compressed) {
return Status::InvalidArgument(
"block_cache same as block_cache_compressed not currently supported, "
"and would be bad for performance anyway");
}
// More complex test of shared key space, in case the instances are wrappers
// for some shared underlying cache.
std::string sentinel_key(size_t{1}, '\0');
static char kRegularBlockCacheMarker = 'b';
static char kCompressedBlockCacheMarker = 'c';
static char kPersistentCacheMarker = 'p';
if (bbto.block_cache) {
bbto.block_cache
->Insert(Slice(sentinel_key), &kRegularBlockCacheMarker, 1,
GetNoopDeleterForRole<CacheEntryRole::kMisc>())
.PermitUncheckedError();
}
if (bbto.block_cache_compressed) {
bbto.block_cache_compressed
->Insert(Slice(sentinel_key), &kCompressedBlockCacheMarker, 1,
GetNoopDeleterForRole<CacheEntryRole::kMisc>())
.PermitUncheckedError();
}
if (bbto.persistent_cache) {
// Note: persistent cache copies the data, not keeping the pointer
bbto.persistent_cache
->Insert(Slice(sentinel_key), &kPersistentCacheMarker, 1)
.PermitUncheckedError();
}
// If we get something different from what we inserted, that indicates
// dangerously overlapping key spaces.
if (bbto.block_cache) {
auto handle = bbto.block_cache->Lookup(Slice(sentinel_key));
if (handle) {
auto v = static_cast<char*>(bbto.block_cache->Value(handle));
char c = *v;
bbto.block_cache->Release(handle);
if (v == &kCompressedBlockCacheMarker) {
return Status::InvalidArgument(
"block_cache and block_cache_compressed share the same key space, "
"which is not supported");
} else if (c == kPersistentCacheMarker) {
return Status::InvalidArgument(
"block_cache and persistent_cache share the same key space, "
"which is not supported");
} else if (v != &kRegularBlockCacheMarker) {
return Status::Corruption("Unexpected mutation to block_cache");
}
}
}
if (bbto.block_cache_compressed) {
auto handle = bbto.block_cache_compressed->Lookup(Slice(sentinel_key));
if (handle) {
auto v = static_cast<char*>(bbto.block_cache_compressed->Value(handle));
char c = *v;
bbto.block_cache_compressed->Release(handle);
if (v == &kRegularBlockCacheMarker) {
return Status::InvalidArgument(
"block_cache_compressed and block_cache share the same key space, "
"which is not supported");
} else if (c == kPersistentCacheMarker) {
return Status::InvalidArgument(
"block_cache_compressed and persistent_cache share the same key "
"space, "
"which is not supported");
} else if (v != &kCompressedBlockCacheMarker) {
return Status::Corruption(
"Unexpected mutation to block_cache_compressed");
}
}
}
if (bbto.persistent_cache) {
std::unique_ptr<char[]> data;
size_t size = 0;
bbto.persistent_cache->Lookup(Slice(sentinel_key), &data, &size)
.PermitUncheckedError();
if (data && size > 0) {
if (data[0] == kRegularBlockCacheMarker) {
return Status::InvalidArgument(
"persistent_cache and block_cache share the same key space, "
"which is not supported");
} else if (data[0] == kCompressedBlockCacheMarker) {
return Status::InvalidArgument(
"persistent_cache and block_cache_compressed share the same key "
"space, "
"which is not supported");
} else if (data[0] != kPersistentCacheMarker) {
return Status::Corruption("Unexpected mutation to persistent_cache");
}
}
}
return Status::OK();
}
} // namespace
Status BlockBasedTableFactory::NewTableReader(
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
@ -566,6 +678,12 @@ Status BlockBasedTableFactory::ValidateOptions(
"max_successive_merges larger than 0 is currently inconsistent with "
"unordered_write");
}
{
Status s = CheckCacheOptionCompatibility(table_options_);
if (!s.ok()) {
return s;
}
}
std::string garbage;
if (!SerializeEnum<ChecksumType>(checksum_type_string_map,
table_options_.checksum, &garbage)) {