Stable cache keys on ingested SST files (#8669)
Summary: Extends https://github.com/facebook/rocksdb/issues/8659 to work for ingested external SST files, even the same file ingested into different DBs sharing a block cache. Note: These new cache keys are currently only enabled when FileSystem does not provide GetUniqueId. For now, they are typically larger, so slightly less efficient. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8669 Test Plan: Extended unit test Reviewed By: zhichao-cao Differential Revision: D30398532 Pulled By: pdillinger fbshipit-source-id: 1f13e2af4b8bfff5741953a69466e9589fbc23c7
This commit is contained in:
parent
2b367fa8cc
commit
b6269b078a
@ -18,6 +18,7 @@
|
|||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
#include "util/compression.h"
|
#include "util/compression.h"
|
||||||
|
#include "util/defer.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
#include "utilities/fault_injection_fs.h"
|
#include "utilities/fault_injection_fs.h"
|
||||||
|
|
||||||
@ -1310,7 +1311,7 @@ class StableCacheKeyTestFS : public FaultInjectionTestFS {
|
|||||||
SetFailGetUniqueId(true);
|
SetFailGetUniqueId(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~StableCacheKeyTestFS() {}
|
virtual ~StableCacheKeyTestFS() override {}
|
||||||
|
|
||||||
IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
|
IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
|
||||||
IODebugContext*) override {
|
IODebugContext*) override {
|
||||||
@ -1342,16 +1343,17 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
|
|||||||
table_options.no_block_cache = true;
|
table_options.no_block_cache = true;
|
||||||
table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
|
table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
|
||||||
verify_stats = [&options] {
|
verify_stats = [&options] {
|
||||||
|
// One for ordinary SST file and one for external SST file
|
||||||
ASSERT_EQ(
|
ASSERT_EQ(
|
||||||
1, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
|
2, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
table_options.cache_index_and_filter_blocks = true;
|
table_options.cache_index_and_filter_blocks = true;
|
||||||
table_options.block_cache = NewLRUCache(1 << 25, 0, false);
|
table_options.block_cache = NewLRUCache(1 << 25, 0, false);
|
||||||
verify_stats = [&options] {
|
verify_stats = [&options] {
|
||||||
ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
|
ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
|
||||||
ASSERT_EQ(1, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
|
ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(2,
|
||||||
options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
|
options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -1360,18 +1362,41 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
|
|||||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
DestroyAndReopen(options);
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
// Ordinary SST file
|
||||||
ASSERT_OK(Put("key1", "abc"));
|
ASSERT_OK(Put("key1", "abc"));
|
||||||
std::string something_compressible(500U, 'x');
|
std::string something_compressible(500U, 'x');
|
||||||
ASSERT_OK(Put("key2", something_compressible));
|
ASSERT_OK(Put("key1a", something_compressible));
|
||||||
ASSERT_OK(Flush());
|
ASSERT_OK(Flush());
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE
|
||||||
|
// External SST file
|
||||||
|
std::string external = dbname_ + "/external.sst";
|
||||||
|
{
|
||||||
|
SstFileWriter sst_file_writer(EnvOptions(), options);
|
||||||
|
ASSERT_OK(sst_file_writer.Open(external));
|
||||||
|
ASSERT_OK(sst_file_writer.Put("key2", "abc"));
|
||||||
|
ASSERT_OK(sst_file_writer.Put("key2a", something_compressible));
|
||||||
|
ExternalSstFileInfo external_info;
|
||||||
|
ASSERT_OK(sst_file_writer.Finish(&external_info));
|
||||||
|
IngestExternalFileOptions ingest_opts;
|
||||||
|
ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Another ordinary SST file
|
||||||
|
ASSERT_OK(Put("key2", "abc"));
|
||||||
|
ASSERT_OK(Put("key2a", something_compressible));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
#endif
|
||||||
|
|
||||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||||
|
ASSERT_EQ(Get("key2"), std::string("abc"));
|
||||||
verify_stats();
|
verify_stats();
|
||||||
|
|
||||||
// Make sure we can cache hit after re-open
|
// Make sure we can cache hit after re-open
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
|
|
||||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||||
|
ASSERT_EQ(Get("key2"), std::string("abc"));
|
||||||
verify_stats();
|
verify_stats();
|
||||||
|
|
||||||
// Make sure we can cache hit even on a full copy of the DB. Using
|
// Make sure we can cache hit even on a full copy of the DB. Using
|
||||||
@ -1386,14 +1411,26 @@ TEST_F(DBBlockCacheTest, StableCacheKeys) {
|
|||||||
|
|
||||||
Close();
|
Close();
|
||||||
Destroy(options);
|
Destroy(options);
|
||||||
dbname_ = db_copy_name;
|
SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
|
||||||
Reopen(options);
|
Reopen(options);
|
||||||
|
|
||||||
ASSERT_EQ(Get("key1"), std::string("abc"));
|
ASSERT_EQ(Get("key1"), std::string("abc"));
|
||||||
|
ASSERT_EQ(Get("key2"), std::string("abc"));
|
||||||
|
verify_stats();
|
||||||
|
|
||||||
|
// And ensure that re-ingesting the same external file into a different DB
|
||||||
|
// uses same cache keys
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
IngestExternalFileOptions ingest_opts;
|
||||||
|
ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts));
|
||||||
|
|
||||||
|
ASSERT_EQ(Get("key2"), std::string("abc"));
|
||||||
verify_stats();
|
verify_stats();
|
||||||
#endif // !ROCKSDB_LITE
|
#endif // !ROCKSDB_LITE
|
||||||
|
|
||||||
Close();
|
Close();
|
||||||
|
Destroy(options);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3945,10 +3945,10 @@ Status DBImpl::GetDbSessionId(std::string& session_id) const {
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBImpl::SetDbSessionId() {
|
std::string DBImpl::GenerateDbSessionId(Env* env) {
|
||||||
// GenerateUniqueId() generates an identifier that has a negligible
|
// GenerateUniqueId() generates an identifier that has a negligible
|
||||||
// probability of being duplicated, ~128 bits of entropy
|
// probability of being duplicated, ~128 bits of entropy
|
||||||
std::string uuid = env_->GenerateUniqueId();
|
std::string uuid = env->GenerateUniqueId();
|
||||||
|
|
||||||
// Hash and reformat that down to a more compact format, 20 characters
|
// Hash and reformat that down to a more compact format, 20 characters
|
||||||
// in base-36 ([0-9A-Z]), which is ~103 bits of entropy, which is enough
|
// in base-36 ([0-9A-Z]), which is ~103 bits of entropy, which is enough
|
||||||
@ -3959,15 +3959,21 @@ void DBImpl::SetDbSessionId() {
|
|||||||
// * Visually distinct from DB id format
|
// * Visually distinct from DB id format
|
||||||
uint64_t a = NPHash64(uuid.data(), uuid.size(), 1234U);
|
uint64_t a = NPHash64(uuid.data(), uuid.size(), 1234U);
|
||||||
uint64_t b = NPHash64(uuid.data(), uuid.size(), 5678U);
|
uint64_t b = NPHash64(uuid.data(), uuid.size(), 5678U);
|
||||||
db_session_id_.resize(20);
|
std::string db_session_id;
|
||||||
|
db_session_id.resize(20);
|
||||||
static const char* const base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
static const char* const base36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
for (; i < 10U; ++i, a /= 36U) {
|
for (; i < 10U; ++i, a /= 36U) {
|
||||||
db_session_id_[i] = base36[a % 36];
|
db_session_id[i] = base36[a % 36];
|
||||||
}
|
}
|
||||||
for (; i < 20U; ++i, b /= 36U) {
|
for (; i < 20U; ++i, b /= 36U) {
|
||||||
db_session_id_[i] = base36[b % 36];
|
db_session_id[i] = base36[b % 36];
|
||||||
}
|
}
|
||||||
|
return db_session_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DBImpl::SetDbSessionId() {
|
||||||
|
db_session_id_ = GenerateDbSessionId(env_);
|
||||||
TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_);
|
TEST_SYNC_POINT_CALLBACK("DBImpl::SetDbSessionId", &db_session_id_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1114,6 +1114,8 @@ class DBImpl : public DB {
|
|||||||
State state_;
|
State state_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static std::string GenerateDbSessionId(Env* env);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const std::string dbname_;
|
const std::string dbname_;
|
||||||
std::string db_id_;
|
std::string db_id_;
|
||||||
|
20
db/repair.cc
20
db/repair.cc
@ -93,6 +93,7 @@ class Repairer {
|
|||||||
const ColumnFamilyOptions& default_cf_opts,
|
const ColumnFamilyOptions& default_cf_opts,
|
||||||
const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs)
|
const ColumnFamilyOptions& unknown_cf_opts, bool create_unknown_cfs)
|
||||||
: dbname_(dbname),
|
: dbname_(dbname),
|
||||||
|
db_session_id_(DBImpl::GenerateDbSessionId(db_options.env)),
|
||||||
env_(db_options.env),
|
env_(db_options.env),
|
||||||
file_options_(),
|
file_options_(),
|
||||||
db_options_(SanitizeOptions(dbname_, db_options)),
|
db_options_(SanitizeOptions(dbname_, db_options)),
|
||||||
@ -109,21 +110,16 @@ class Repairer {
|
|||||||
// TableCache can be small since we expect each table to be opened
|
// TableCache can be small since we expect each table to be opened
|
||||||
// once.
|
// once.
|
||||||
NewLRUCache(10, db_options_.table_cache_numshardbits)),
|
NewLRUCache(10, db_options_.table_cache_numshardbits)),
|
||||||
table_cache_(
|
table_cache_(new TableCache(default_iopts_, &file_options_,
|
||||||
// TODO: db_session_id for TableCache should be initialized after
|
raw_table_cache_.get(),
|
||||||
// db_session_id_ is set.
|
/*block_cache_tracer=*/nullptr,
|
||||||
new TableCache(default_iopts_, &file_options_,
|
/*io_tracer=*/nullptr, db_session_id_)),
|
||||||
raw_table_cache_.get(),
|
|
||||||
/*block_cache_tracer=*/nullptr,
|
|
||||||
/*io_tracer=*/nullptr, /*db_session_id*/ "")),
|
|
||||||
wb_(db_options_.db_write_buffer_size),
|
wb_(db_options_.db_write_buffer_size),
|
||||||
wc_(db_options_.delayed_write_rate),
|
wc_(db_options_.delayed_write_rate),
|
||||||
// TODO: db_session_id for VersionSet should be initialized after
|
|
||||||
// db_session_id_ is set and use it for initialization.
|
|
||||||
vset_(dbname_, &immutable_db_options_, file_options_,
|
vset_(dbname_, &immutable_db_options_, file_options_,
|
||||||
raw_table_cache_.get(), &wb_, &wc_,
|
raw_table_cache_.get(), &wb_, &wc_,
|
||||||
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
|
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
|
||||||
/*db_session_id*/ ""),
|
db_session_id_),
|
||||||
next_file_number_(1),
|
next_file_number_(1),
|
||||||
db_lock_(nullptr),
|
db_lock_(nullptr),
|
||||||
closed_(false) {
|
closed_(false) {
|
||||||
@ -198,10 +194,6 @@ class Repairer {
|
|||||||
}
|
}
|
||||||
// Just create a DBImpl temporarily so we can reuse NewDB()
|
// Just create a DBImpl temporarily so we can reuse NewDB()
|
||||||
db_impl = new DBImpl(db_options_, dbname_);
|
db_impl = new DBImpl(db_options_, dbname_);
|
||||||
// Also use this temp DBImpl to get a session id
|
|
||||||
status = db_impl->GetDbSessionId(db_session_id_);
|
|
||||||
}
|
|
||||||
if (status.ok()) {
|
|
||||||
status = db_impl->NewDB(/*new_filenames=*/nullptr);
|
status = db_impl->NewDB(/*new_filenames=*/nullptr);
|
||||||
}
|
}
|
||||||
delete db_impl;
|
delete db_impl;
|
||||||
|
@ -649,8 +649,6 @@ Status BlockBasedTable::Open(
|
|||||||
// under these portable/stable keys.
|
// under these portable/stable keys.
|
||||||
// Note: For now, each external SST file gets its own unique session id,
|
// Note: For now, each external SST file gets its own unique session id,
|
||||||
// so we can use a fixed file number under than session id.
|
// so we can use a fixed file number under than session id.
|
||||||
// ... except FIXME (peterd): sst_file_writer currently uses wrong
|
|
||||||
// format for db_session_ids so this approach doesn't work yet.
|
|
||||||
db_session_id = rep->table_properties->db_session_id;
|
db_session_id = rep->table_properties->db_session_id;
|
||||||
file_num = 1;
|
file_num = 1;
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "db/db_impl/db_impl.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "file/writable_file_writer.h"
|
#include "file/writable_file_writer.h"
|
||||||
#include "rocksdb/file_system.h"
|
#include "rocksdb/file_system.h"
|
||||||
@ -245,7 +246,7 @@ Status SstFileWriter::Open(const std::string& file_path) {
|
|||||||
// Here we mimic the way db_session_id behaves by resetting the db_session_id
|
// Here we mimic the way db_session_id behaves by resetting the db_session_id
|
||||||
// every time SstFileWriter is used, and in this case db_id is set to be "SST
|
// every time SstFileWriter is used, and in this case db_id is set to be "SST
|
||||||
// Writer".
|
// Writer".
|
||||||
std::string db_session_id = r->ioptions.env->GenerateUniqueId();
|
std::string db_session_id = DBImpl::GenerateDbSessionId(r->ioptions.env);
|
||||||
if (!db_session_id.empty() && db_session_id.back() == '\n') {
|
if (!db_session_id.empty() && db_session_id.back() == '\n') {
|
||||||
db_session_id.pop_back();
|
db_session_id.pop_back();
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,14 @@ class SaveAndRestore {
|
|||||||
public:
|
public:
|
||||||
// obj is non-null pointer to value to be saved and later restored.
|
// obj is non-null pointer to value to be saved and later restored.
|
||||||
explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {}
|
explicit SaveAndRestore(T* obj) : obj_(obj), saved_(*obj) {}
|
||||||
|
// new_value is stored in *obj
|
||||||
|
SaveAndRestore(T* obj, const T& new_value)
|
||||||
|
: obj_(obj), saved_(std::move(*obj)) {
|
||||||
|
*obj = new_value;
|
||||||
|
}
|
||||||
|
SaveAndRestore(T* obj, T&& new_value) : obj_(obj), saved_(std::move(*obj)) {
|
||||||
|
*obj = std::move(new_value);
|
||||||
|
}
|
||||||
~SaveAndRestore() { *obj_ = std::move(saved_); }
|
~SaveAndRestore() { *obj_ = std::move(saved_); }
|
||||||
|
|
||||||
// No copies
|
// No copies
|
||||||
|
Loading…
Reference in New Issue
Block a user