Check for and disallow shared key space in block caches (#9172)
Summary: We have three layers of block cache that often use the same key but map to different physical data: * BlockBasedTableOptions::block_cache * BlockBasedTableOptions::block_cache_compressed * BlockBasedTableOptions::persistent_cache If any two of these happen to share an underlying implementation and key space (insertion into one shows up in another), then memory safety is broken. The simplest case is block_cache == block_cache_compressed. (Credit mrambacher for asking about this case in a review.) With this change, we explicitly check for overlap and preemptively and safely fail with a Status code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9172 Test Plan: test added. Crashes without new check Reviewed By: anand1976 Differential Revision: D32465659 Pulled By: pdillinger fbshipit-source-id: 3876b45b6dce6167e5a7a642725ddc86b96f8e40
This commit is contained in:
parent
28f54e71f3
commit
f8c685c4fc
@ -15,6 +15,7 @@
|
|||||||
* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`.
|
* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`.
|
||||||
* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete.
|
* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete.
|
||||||
* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer.
|
* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer.
|
||||||
|
* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.)
|
||||||
|
|
||||||
### Behavior Changes
|
### Behavior Changes
|
||||||
* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files.
|
* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files.
|
||||||
|
12
cache/cache_entry_roles.h
vendored
12
cache/cache_entry_roles.h
vendored
@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
@ -90,7 +92,9 @@ struct RegisteredDeleter {
|
|||||||
// These have global linkage to help ensure compiler optimizations do not
|
// These have global linkage to help ensure compiler optimizations do not
|
||||||
// break uniqueness for each <T,R>
|
// break uniqueness for each <T,R>
|
||||||
static void Delete(const Slice& /* key */, void* value) {
|
static void Delete(const Slice& /* key */, void* value) {
|
||||||
delete static_cast<T*>(value);
|
// Supports T == Something[], unlike delete operator
|
||||||
|
std::default_delete<T>()(
|
||||||
|
static_cast<typename std::remove_extent<T>::type*>(value));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -98,9 +102,9 @@ template <CacheEntryRole R>
|
|||||||
struct RegisteredNoopDeleter {
|
struct RegisteredNoopDeleter {
|
||||||
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
|
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
|
||||||
|
|
||||||
static void Delete(const Slice& /* key */, void* value) {
|
static void Delete(const Slice& /* key */, void* /* value */) {
|
||||||
(void)value;
|
// Here was `assert(value == nullptr);` but we can also put pointers
|
||||||
assert(value == nullptr);
|
// to static data in Cache, for testing at least.
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -336,6 +336,143 @@ TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
|
|||||||
CheckCacheCounters(options, 1, 0, 1, 0);
|
CheckCacheCounters(options, 1, 0, 1, 0);
|
||||||
CheckCompressedCacheCounters(options, 0, 1, 0, 0);
|
CheckCompressedCacheCounters(options, 0, 1, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class PersistentCacheFromCache : public PersistentCache {
|
||||||
|
public:
|
||||||
|
PersistentCacheFromCache(std::shared_ptr<Cache> cache, bool read_only)
|
||||||
|
: cache_(cache), read_only_(read_only) {}
|
||||||
|
|
||||||
|
Status Insert(const Slice& key, const char* data,
|
||||||
|
const size_t size) override {
|
||||||
|
if (read_only_) {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
std::unique_ptr<char[]> copy{new char[size]};
|
||||||
|
std::copy_n(data, size, copy.get());
|
||||||
|
Status s = cache_->Insert(
|
||||||
|
key, copy.get(), size,
|
||||||
|
GetCacheEntryDeleterForRole<char[], CacheEntryRole::kMisc>());
|
||||||
|
if (s.ok()) {
|
||||||
|
copy.release();
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
|
||||||
|
size_t* size) override {
|
||||||
|
auto handle = cache_->Lookup(key);
|
||||||
|
if (handle) {
|
||||||
|
char* ptr = static_cast<char*>(cache_->Value(handle));
|
||||||
|
*size = cache_->GetCharge(handle);
|
||||||
|
data->reset(new char[*size]);
|
||||||
|
std::copy_n(ptr, *size, data->get());
|
||||||
|
cache_->Release(handle);
|
||||||
|
return Status::OK();
|
||||||
|
} else {
|
||||||
|
return Status::NotFound();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsCompressed() override { return false; }
|
||||||
|
|
||||||
|
StatsType Stats() override { return StatsType(); }
|
||||||
|
|
||||||
|
std::string GetPrintableOptions() const override { return ""; }
|
||||||
|
|
||||||
|
uint64_t NewId() override { return cache_->NewId(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<Cache> cache_;
|
||||||
|
bool read_only_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReadOnlyCacheWrapper : public CacheWrapper {
|
||||||
|
using CacheWrapper::CacheWrapper;
|
||||||
|
|
||||||
|
using Cache::Insert;
|
||||||
|
Status Insert(const Slice& /*key*/, void* /*value*/, size_t /*charge*/,
|
||||||
|
void (*)(const Slice& key, void* value) /*deleter*/,
|
||||||
|
Handle** /*handle*/, Priority /*priority*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST_F(DBBlockCacheTest, TestWithSameCompressed) {
|
||||||
|
auto table_options = GetTableOptions();
|
||||||
|
auto options = GetOptions(table_options);
|
||||||
|
InitTable(options);
|
||||||
|
|
||||||
|
std::shared_ptr<Cache> rw_cache{NewLRUCache(1000000)};
|
||||||
|
std::shared_ptr<PersistentCacheFromCache> rw_pcache{
|
||||||
|
new PersistentCacheFromCache(rw_cache, /*read_only*/ false)};
|
||||||
|
// Exercise some obscure behavior with read-only wrappers
|
||||||
|
std::shared_ptr<Cache> ro_cache{new ReadOnlyCacheWrapper(rw_cache)};
|
||||||
|
std::shared_ptr<PersistentCacheFromCache> ro_pcache{
|
||||||
|
new PersistentCacheFromCache(rw_cache, /*read_only*/ true)};
|
||||||
|
|
||||||
|
// Simple same pointer
|
||||||
|
table_options.block_cache = rw_cache;
|
||||||
|
table_options.block_cache_compressed = rw_cache;
|
||||||
|
table_options.persistent_cache.reset();
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: block_cache same as block_cache_compressed not "
|
||||||
|
"currently supported, and would be bad for performance anyway");
|
||||||
|
|
||||||
|
// Other cases
|
||||||
|
table_options.block_cache = ro_cache;
|
||||||
|
table_options.block_cache_compressed = rw_cache;
|
||||||
|
table_options.persistent_cache.reset();
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: block_cache and block_cache_compressed share "
|
||||||
|
"the same key space, which is not supported");
|
||||||
|
|
||||||
|
table_options.block_cache = rw_cache;
|
||||||
|
table_options.block_cache_compressed = ro_cache;
|
||||||
|
table_options.persistent_cache.reset();
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: block_cache_compressed and block_cache share "
|
||||||
|
"the same key space, which is not supported");
|
||||||
|
|
||||||
|
table_options.block_cache = ro_cache;
|
||||||
|
table_options.block_cache_compressed.reset();
|
||||||
|
table_options.persistent_cache = rw_pcache;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: block_cache and persistent_cache share the same "
|
||||||
|
"key space, which is not supported");
|
||||||
|
|
||||||
|
table_options.block_cache = rw_cache;
|
||||||
|
table_options.block_cache_compressed.reset();
|
||||||
|
table_options.persistent_cache = ro_pcache;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: persistent_cache and block_cache share the same "
|
||||||
|
"key space, which is not supported");
|
||||||
|
|
||||||
|
table_options.block_cache.reset();
|
||||||
|
table_options.no_block_cache = true;
|
||||||
|
table_options.block_cache_compressed = ro_cache;
|
||||||
|
table_options.persistent_cache = rw_pcache;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: block_cache_compressed and persistent_cache "
|
||||||
|
"share the same key space, which is not supported");
|
||||||
|
|
||||||
|
table_options.block_cache.reset();
|
||||||
|
table_options.no_block_cache = true;
|
||||||
|
table_options.block_cache_compressed = rw_cache;
|
||||||
|
table_options.persistent_cache = ro_pcache;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
ASSERT_EQ(TryReopen(options).ToString(),
|
||||||
|
"Invalid argument: persistent_cache and block_cache_compressed "
|
||||||
|
"share the same key space, which is not supported");
|
||||||
|
}
|
||||||
#endif // SNAPPY
|
#endif // SNAPPY
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
|
@ -31,7 +31,7 @@ class PersistentCache {
|
|||||||
// Insert to page cache
|
// Insert to page cache
|
||||||
//
|
//
|
||||||
// page_key Identifier to identify a page uniquely across restarts
|
// page_key Identifier to identify a page uniquely across restarts
|
||||||
// data Page data
|
// data Page data to copy (caller retains ownership)
|
||||||
// size Size of the page
|
// size Size of the page
|
||||||
virtual Status Insert(const Slice& key, const char* data,
|
virtual Status Insert(const Slice& key, const char* data,
|
||||||
const size_t size) = 0;
|
const size_t size) = 0;
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "cache/cache_entry_roles.h"
|
||||||
#include "logging/logging.h"
|
#include "logging/logging.h"
|
||||||
#include "options/options_helper.h"
|
#include "options/options_helper.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
@ -23,6 +24,7 @@
|
|||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
#include "rocksdb/flush_block_policy.h"
|
#include "rocksdb/flush_block_policy.h"
|
||||||
#include "rocksdb/rocksdb_namespace.h"
|
#include "rocksdb/rocksdb_namespace.h"
|
||||||
|
#include "rocksdb/table.h"
|
||||||
#include "rocksdb/utilities/options_type.h"
|
#include "rocksdb/utilities/options_type.h"
|
||||||
#include "table/block_based/block_based_table_builder.h"
|
#include "table/block_based/block_based_table_builder.h"
|
||||||
#include "table/block_based/block_based_table_reader.h"
|
#include "table/block_based/block_based_table_reader.h"
|
||||||
@ -488,6 +490,116 @@ Status BlockBasedTableFactory::PrepareOptions(const ConfigOptions& opts) {
|
|||||||
return TableFactory::PrepareOptions(opts);
|
return TableFactory::PrepareOptions(opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// Different cache kinds use the same keys for physically different values, so
|
||||||
|
// they must not share an underlying key space with each other.
|
||||||
|
Status CheckCacheOptionCompatibility(const BlockBasedTableOptions& bbto) {
|
||||||
|
int cache_count = (bbto.block_cache != nullptr) +
|
||||||
|
(bbto.block_cache_compressed != nullptr) +
|
||||||
|
(bbto.persistent_cache != nullptr);
|
||||||
|
if (cache_count <= 1) {
|
||||||
|
// Nothing to share / overlap
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple pointer equality
|
||||||
|
if (bbto.block_cache == bbto.block_cache_compressed) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"block_cache same as block_cache_compressed not currently supported, "
|
||||||
|
"and would be bad for performance anyway");
|
||||||
|
}
|
||||||
|
|
||||||
|
// More complex test of shared key space, in case the instances are wrappers
|
||||||
|
// for some shared underlying cache.
|
||||||
|
std::string sentinel_key(size_t{1}, '\0');
|
||||||
|
static char kRegularBlockCacheMarker = 'b';
|
||||||
|
static char kCompressedBlockCacheMarker = 'c';
|
||||||
|
static char kPersistentCacheMarker = 'p';
|
||||||
|
if (bbto.block_cache) {
|
||||||
|
bbto.block_cache
|
||||||
|
->Insert(Slice(sentinel_key), &kRegularBlockCacheMarker, 1,
|
||||||
|
GetNoopDeleterForRole<CacheEntryRole::kMisc>())
|
||||||
|
.PermitUncheckedError();
|
||||||
|
}
|
||||||
|
if (bbto.block_cache_compressed) {
|
||||||
|
bbto.block_cache_compressed
|
||||||
|
->Insert(Slice(sentinel_key), &kCompressedBlockCacheMarker, 1,
|
||||||
|
GetNoopDeleterForRole<CacheEntryRole::kMisc>())
|
||||||
|
.PermitUncheckedError();
|
||||||
|
}
|
||||||
|
if (bbto.persistent_cache) {
|
||||||
|
// Note: persistent cache copies the data, not keeping the pointer
|
||||||
|
bbto.persistent_cache
|
||||||
|
->Insert(Slice(sentinel_key), &kPersistentCacheMarker, 1)
|
||||||
|
.PermitUncheckedError();
|
||||||
|
}
|
||||||
|
// If we get something different from what we inserted, that indicates
|
||||||
|
// dangerously overlapping key spaces.
|
||||||
|
if (bbto.block_cache) {
|
||||||
|
auto handle = bbto.block_cache->Lookup(Slice(sentinel_key));
|
||||||
|
if (handle) {
|
||||||
|
auto v = static_cast<char*>(bbto.block_cache->Value(handle));
|
||||||
|
char c = *v;
|
||||||
|
bbto.block_cache->Release(handle);
|
||||||
|
if (v == &kCompressedBlockCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"block_cache and block_cache_compressed share the same key space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (c == kPersistentCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"block_cache and persistent_cache share the same key space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (v != &kRegularBlockCacheMarker) {
|
||||||
|
return Status::Corruption("Unexpected mutation to block_cache");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bbto.block_cache_compressed) {
|
||||||
|
auto handle = bbto.block_cache_compressed->Lookup(Slice(sentinel_key));
|
||||||
|
if (handle) {
|
||||||
|
auto v = static_cast<char*>(bbto.block_cache_compressed->Value(handle));
|
||||||
|
char c = *v;
|
||||||
|
bbto.block_cache_compressed->Release(handle);
|
||||||
|
if (v == &kRegularBlockCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"block_cache_compressed and block_cache share the same key space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (c == kPersistentCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"block_cache_compressed and persistent_cache share the same key "
|
||||||
|
"space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (v != &kCompressedBlockCacheMarker) {
|
||||||
|
return Status::Corruption(
|
||||||
|
"Unexpected mutation to block_cache_compressed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bbto.persistent_cache) {
|
||||||
|
std::unique_ptr<char[]> data;
|
||||||
|
size_t size = 0;
|
||||||
|
bbto.persistent_cache->Lookup(Slice(sentinel_key), &data, &size)
|
||||||
|
.PermitUncheckedError();
|
||||||
|
if (data && size > 0) {
|
||||||
|
if (data[0] == kRegularBlockCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"persistent_cache and block_cache share the same key space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (data[0] == kCompressedBlockCacheMarker) {
|
||||||
|
return Status::InvalidArgument(
|
||||||
|
"persistent_cache and block_cache_compressed share the same key "
|
||||||
|
"space, "
|
||||||
|
"which is not supported");
|
||||||
|
} else if (data[0] != kPersistentCacheMarker) {
|
||||||
|
return Status::Corruption("Unexpected mutation to persistent_cache");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
Status BlockBasedTableFactory::NewTableReader(
|
Status BlockBasedTableFactory::NewTableReader(
|
||||||
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
|
const ReadOptions& ro, const TableReaderOptions& table_reader_options,
|
||||||
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
|
||||||
@ -566,6 +678,12 @@ Status BlockBasedTableFactory::ValidateOptions(
|
|||||||
"max_successive_merges larger than 0 is currently inconsistent with "
|
"max_successive_merges larger than 0 is currently inconsistent with "
|
||||||
"unordered_write");
|
"unordered_write");
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
Status s = CheckCacheOptionCompatibility(table_options_);
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
std::string garbage;
|
std::string garbage;
|
||||||
if (!SerializeEnum<ChecksumType>(checksum_type_string_map,
|
if (!SerializeEnum<ChecksumType>(checksum_type_string_map,
|
||||||
table_options_.checksum, &garbage)) {
|
table_options_.checksum, &garbage)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user