Stable cache keys using DB session ids in SSTs (#8659)
Summary: Use DB session ids in SST table properties to make cache keys stable across DB re-open and copy / move / restore / etc. These new cache keys are currently only enabled when FileSystem does not provide GetUniqueId. For now, they are typically larger, so slightly less efficient. Relevant to https://github.com/facebook/rocksdb/issues/7405 This change has a minor regression in PersistentCache functionality: metaindex blocks are no longer cached in PersistentCache. Table properties blocks already were not but ideally should be. I didn't spent effort to fix & test these issues because we don't believe PersistentCache is used much if at all and expect SecondaryCache to replace it. (Though PRs are welcome.) FIXME: there is more to be fixed for stable cache keys on external SST files Pull Request resolved: https://github.com/facebook/rocksdb/pull/8659 Test Plan: new unit test added, which fails when disabling new functionality Reviewed By: zhichao-cao Differential Revision: D30297705 Pulled By: pdillinger fbshipit-source-id: e8539a5c8802a79340405629870f2e3fb3822d3a
This commit is contained in:
parent
5de333fd99
commit
a207c27809
@ -2756,6 +2756,11 @@ class DBBasicTestMultiGet : public DBTestBase {
|
||||
EXPECT_OK(dbfull()->Flush(FlushOptions(), handles_[cf]));
|
||||
}
|
||||
}
|
||||
// Clear compressed cache, which is always pre-populated
|
||||
if (compressed_cache_) {
|
||||
compressed_cache_->SetCapacity(0);
|
||||
compressed_cache_->SetCapacity(1048576);
|
||||
}
|
||||
}
|
||||
|
||||
bool CheckValue(int i, const std::string& value) {
|
||||
|
@ -7,6 +7,7 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "cache/cache_entry_roles.h"
|
||||
@ -14,9 +15,11 @@
|
||||
#include "db/column_family.h"
|
||||
#include "db/db_test_util.h"
|
||||
#include "port/stack_trace.h"
|
||||
#include "rocksdb/statistics.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "util/compression.h"
|
||||
#include "util/random.h"
|
||||
#include "utilities/fault_injection_fs.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
@ -1298,6 +1301,102 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
// Disable LinkFile so that we can physically copy a DB using Checkpoint.
|
||||
// Disable file GetUniqueId to enable stable cache keys.
|
||||
class StableCacheKeyTestFS : public FaultInjectionTestFS {
|
||||
public:
|
||||
explicit StableCacheKeyTestFS(const std::shared_ptr<FileSystem>& base)
|
||||
: FaultInjectionTestFS(base) {
|
||||
SetFailGetUniqueId(true);
|
||||
}
|
||||
|
||||
virtual ~StableCacheKeyTestFS() {}
|
||||
|
||||
IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
|
||||
IODebugContext*) override {
|
||||
return IOStatus::NotSupported("Disabled");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(DBBlockCacheTest, StableCacheKeys) {
|
||||
std::shared_ptr<StableCacheKeyTestFS> test_fs{
|
||||
new StableCacheKeyTestFS(env_->GetFileSystem())};
|
||||
std::unique_ptr<CompositeEnvWrapper> test_env{
|
||||
new CompositeEnvWrapper(env_, test_fs)};
|
||||
|
||||
for (bool compressed : {false, true}) {
|
||||
Options options = CurrentOptions();
|
||||
options.create_if_missing = true;
|
||||
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
||||
options.env = test_env.get();
|
||||
|
||||
BlockBasedTableOptions table_options;
|
||||
|
||||
std::function<void()> verify_stats;
|
||||
if (compressed) {
|
||||
if (!Snappy_Supported()) {
|
||||
fprintf(stderr, "skipping compressed test, snappy unavailable\n");
|
||||
continue;
|
||||
}
|
||||
options.compression = CompressionType::kSnappyCompression;
|
||||
table_options.no_block_cache = true;
|
||||
table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
|
||||
verify_stats = [&options] {
|
||||
ASSERT_EQ(
|
||||
1, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
|
||||
};
|
||||
} else {
|
||||
table_options.cache_index_and_filter_blocks = true;
|
||||
table_options.block_cache = NewLRUCache(1 << 25, 0, false);
|
||||
verify_stats = [&options] {
|
||||
ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
|
||||
ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
|
||||
ASSERT_EQ(1,
|
||||
options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
|
||||
};
|
||||
}
|
||||
|
||||
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
DestroyAndReopen(options);
|
||||
|
||||
ASSERT_OK(Put("key1", "abc"));
|
||||
std::string something_compressible(500U, 'x');
|
||||
ASSERT_OK(Put("key2", something_compressible));
|
||||
ASSERT_OK(Flush());
|
||||
|
||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||
verify_stats();
|
||||
|
||||
// Make sure we can cache hit after re-open
|
||||
Reopen(options);
|
||||
|
||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||
verify_stats();
|
||||
|
||||
// Make sure we can cache hit even on a full copy of the DB. Using
|
||||
// StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link.
|
||||
// (Checkpoint not available in LITE mode to test this.)
|
||||
#ifndef ROCKSDB_LITE
|
||||
auto db_copy_name = dbname_ + "-copy";
|
||||
Checkpoint* checkpoint;
|
||||
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
|
||||
ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name));
|
||||
delete checkpoint;
|
||||
|
||||
Close();
|
||||
Destroy(options);
|
||||
dbname_ = db_copy_name;
|
||||
Reopen(options);
|
||||
|
||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||
verify_stats();
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
Close();
|
||||
}
|
||||
}
|
||||
|
||||
class DBBlockCachePinningTest
|
||||
: public DBTestBase,
|
||||
public testing::WithParamInterface<
|
||||
|
@ -1398,6 +1398,7 @@ void DeleteEntryCached(const Slice& /*key*/, void* value) {
|
||||
// Helper function to setup the cache key's prefix for the Table.
|
||||
void BlockBasedTableBuilder::SetupCacheKeyPrefix(
|
||||
const TableBuilderOptions& tbo) {
|
||||
// FIXME: Unify with BlockBasedTable::SetupCacheKeyPrefix
|
||||
if (rep_->table_options.block_cache.get() != nullptr) {
|
||||
BlockBasedTable::GenerateCachePrefix<Cache, FSWritableFile>(
|
||||
rep_->table_options.block_cache.get(), rep_->file->writable_file(),
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/multiget_context.h"
|
||||
#include "table/persistent_cache_helper.h"
|
||||
#include "table/persistent_cache_options.h"
|
||||
#include "table/sst_file_writer_collectors.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
#include "test_util/sync_point.h"
|
||||
@ -371,7 +372,7 @@ Cache::Handle* BlockBasedTable::GetEntryFromCache(
|
||||
// Helper function to setup the cache key's prefix for the Table.
|
||||
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep,
|
||||
const std::string& db_session_id,
|
||||
uint64_t cur_file_num) {
|
||||
uint64_t file_num) {
|
||||
assert(kMaxCacheKeyPrefixSize >= 10);
|
||||
rep->cache_key_prefix_size = 0;
|
||||
rep->compressed_cache_key_prefix_size = 0;
|
||||
@ -379,19 +380,28 @@ void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep,
|
||||
GenerateCachePrefix<Cache, FSRandomAccessFile>(
|
||||
rep->table_options.block_cache.get(), rep->file->file(),
|
||||
&rep->cache_key_prefix[0], &rep->cache_key_prefix_size, db_session_id,
|
||||
cur_file_num);
|
||||
}
|
||||
if (rep->table_options.persistent_cache != nullptr) {
|
||||
GenerateCachePrefix<PersistentCache, FSRandomAccessFile>(
|
||||
rep->table_options.persistent_cache.get(), rep->file->file(),
|
||||
&rep->persistent_cache_key_prefix[0],
|
||||
&rep->persistent_cache_key_prefix_size, "", cur_file_num);
|
||||
file_num);
|
||||
}
|
||||
if (rep->table_options.block_cache_compressed != nullptr) {
|
||||
GenerateCachePrefix<Cache, FSRandomAccessFile>(
|
||||
rep->table_options.block_cache_compressed.get(), rep->file->file(),
|
||||
&rep->compressed_cache_key_prefix[0],
|
||||
&rep->compressed_cache_key_prefix_size, "", cur_file_num);
|
||||
&rep->compressed_cache_key_prefix_size, db_session_id, file_num);
|
||||
}
|
||||
if (rep->table_options.persistent_cache != nullptr) {
|
||||
char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t persistent_cache_key_prefix_size = 0;
|
||||
|
||||
GenerateCachePrefix<PersistentCache, FSRandomAccessFile>(
|
||||
rep->table_options.persistent_cache.get(), rep->file->file(),
|
||||
&persistent_cache_key_prefix[0], &persistent_cache_key_prefix_size,
|
||||
db_session_id, file_num);
|
||||
|
||||
rep->persistent_cache_options =
|
||||
PersistentCacheOptions(rep->table_options.persistent_cache,
|
||||
std::string(persistent_cache_key_prefix,
|
||||
persistent_cache_key_prefix_size),
|
||||
rep->ioptions.stats);
|
||||
}
|
||||
}
|
||||
|
||||
@ -513,7 +523,7 @@ Status BlockBasedTable::Open(
|
||||
const SequenceNumber largest_seqno, const bool force_direct_prefetch,
|
||||
TailPrefetchStats* tail_prefetch_stats,
|
||||
BlockCacheTracer* const block_cache_tracer,
|
||||
size_t max_file_size_for_l0_meta_pin, const std::string& db_session_id,
|
||||
size_t max_file_size_for_l0_meta_pin, const std::string& cur_db_session_id,
|
||||
uint64_t cur_file_num) {
|
||||
table_reader->reset();
|
||||
|
||||
@ -588,16 +598,11 @@ Status BlockBasedTable::Open(
|
||||
rep->internal_prefix_transform.reset(
|
||||
new InternalKeySliceTransform(prefix_extractor));
|
||||
}
|
||||
SetupCacheKeyPrefix(rep, db_session_id, cur_file_num);
|
||||
std::unique_ptr<BlockBasedTable> new_table(
|
||||
new BlockBasedTable(rep, block_cache_tracer));
|
||||
|
||||
// page cache options
|
||||
rep->persistent_cache_options =
|
||||
PersistentCacheOptions(rep->table_options.persistent_cache,
|
||||
std::string(rep->persistent_cache_key_prefix,
|
||||
rep->persistent_cache_key_prefix_size),
|
||||
rep->ioptions.stats);
|
||||
// For fully portable/stable cache keys, we need to read the properties
|
||||
// block before setting up cache keys. TODO: consider setting up a bootstrap
|
||||
// cache key for PersistentCache to use for metaindex and properties blocks.
|
||||
rep->persistent_cache_options = PersistentCacheOptions();
|
||||
|
||||
// Meta-blocks are not dictionary compressed. Explicitly set the dictionary
|
||||
// handle to null, otherwise it may be seen as uninitialized during the below
|
||||
@ -605,6 +610,8 @@ Status BlockBasedTable::Open(
|
||||
rep->compression_dict_handle = BlockHandle::NullBlockHandle();
|
||||
|
||||
// Read metaindex
|
||||
std::unique_ptr<BlockBasedTable> new_table(
|
||||
new BlockBasedTable(rep, block_cache_tracer));
|
||||
std::unique_ptr<Block> metaindex;
|
||||
std::unique_ptr<InternalIterator> metaindex_iter;
|
||||
s = new_table->ReadMetaIndexBlock(ro, prefetch_buffer.get(), &metaindex,
|
||||
@ -620,6 +627,36 @@ Status BlockBasedTable::Open(
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// With properties loaded, we can set up portable/stable cache keys if
|
||||
// necessary info is available
|
||||
std::string db_session_id = cur_db_session_id;
|
||||
uint64_t file_num = cur_file_num;
|
||||
if (rep->table_properties && !rep->table_properties->db_session_id.empty()) {
|
||||
const auto& uprops = rep->table_properties->user_collected_properties;
|
||||
auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion);
|
||||
if (version_iter == uprops.end()) {
|
||||
// Normal (non-external) SST file - can only use embedded db_session_id
|
||||
// with current file number (which should be original file number)
|
||||
if (file_num > 0) {
|
||||
db_session_id = rep->table_properties->db_session_id;
|
||||
}
|
||||
} else {
|
||||
// External (ingested) SST file - should not use current file number
|
||||
// (which is changed from original), so that same file ingested into
|
||||
// different DBs can share block cache entries. Although they can modify
|
||||
// the embedded global_seqno, that information is not currently cached
|
||||
// under these portable/stable keys.
|
||||
// Note: For now, each external SST file gets its own unique session id,
|
||||
// so we can use a fixed file number under than session id.
|
||||
// ... except FIXME (peterd): sst_file_writer currently uses wrong
|
||||
// format for db_session_ids so this approach doesn't work yet.
|
||||
db_session_id = rep->table_properties->db_session_id;
|
||||
file_num = 1;
|
||||
}
|
||||
}
|
||||
SetupCacheKeyPrefix(rep, db_session_id, file_num);
|
||||
|
||||
s = new_table->ReadRangeDelBlock(ro, prefetch_buffer.get(),
|
||||
metaindex_iter.get(), internal_comparator,
|
||||
&lookup_context);
|
||||
|
@ -9,6 +9,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "db/range_tombstone_fragmenter.h"
|
||||
#include "file/filename.h"
|
||||
#include "table/block_based/block_based_table_factory.h"
|
||||
@ -19,7 +21,6 @@
|
||||
#include "table/table_properties_internal.h"
|
||||
#include "table/table_reader.h"
|
||||
#include "table/two_level_iterator.h"
|
||||
|
||||
#include "trace_replay/block_cache_tracer.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
@ -100,7 +101,7 @@ class BlockBasedTable : public TableReader {
|
||||
TailPrefetchStats* tail_prefetch_stats = nullptr,
|
||||
BlockCacheTracer* const block_cache_tracer = nullptr,
|
||||
size_t max_file_size_for_l0_meta_pin = 0,
|
||||
const std::string& db_session_id = "",
|
||||
const std::string& cur_db_session_id = "",
|
||||
uint64_t cur_file_num = 0);
|
||||
|
||||
bool PrefixMayMatch(const Slice& internal_key,
|
||||
@ -555,11 +556,11 @@ struct BlockBasedTable::Rep {
|
||||
Status status;
|
||||
std::unique_ptr<RandomAccessFileReader> file;
|
||||
char cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t cache_key_prefix_size = 0;
|
||||
char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t persistent_cache_key_prefix_size = 0;
|
||||
// SIZE_MAX -> assert not used without re-assignment
|
||||
size_t cache_key_prefix_size = SIZE_MAX;
|
||||
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||
size_t compressed_cache_key_prefix_size = 0;
|
||||
// SIZE_MAX -> assert not used without re-assignment
|
||||
size_t compressed_cache_key_prefix_size = SIZE_MAX;
|
||||
PersistentCacheOptions persistent_cache_options;
|
||||
|
||||
// Footer contains the fixed table information
|
||||
|
@ -228,6 +228,8 @@ Status ReadProperties(const ReadOptions& read_options,
|
||||
|
||||
BlockContents block_contents;
|
||||
Status s;
|
||||
// FIXME: should be a parameter for reading table properties to use persistent
|
||||
// cache
|
||||
PersistentCacheOptions cache_options;
|
||||
ReadOptions ro = read_options;
|
||||
ro.verify_checksums = verify_checksum;
|
||||
|
Loading…
Reference in New Issue
Block a user