Making persistent cache more resilient to filesystem failures

Summary:
The persistent cache is designed to hop over errors and return key not found. So far, it has shown resilience to write errors, encoding errors, data corruption etc. It is not resilient against disappearing files/directories. This was exposed during testing when multiple instances of persistence cache was started sharing the same directory simulating an unpredictable filesystem environment.

This patch

- makes the write code path more resilient to errors while creating files
- makes the read code path more resilient to handle situation where files are not found
- added a test that does negative write/read testing by removing the directory while writes are in progress
Closes https://github.com/facebook/rocksdb/pull/1472

Differential Revision: D4143413

Pulled By: kradhakrishnan

fbshipit-source-id: fd25e9b
This commit is contained in:
Karthikeyan Radhakrishnan 2016-11-22 10:26:08 -08:00 committed by Facebook Github Bot
parent 734e4acafb
commit 3068870cce
5 changed files with 103 additions and 21 deletions

View File

@ -11,6 +11,7 @@
#include <vector> #include <vector>
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/sync_point.h"
#include "utilities/persistent_cache/block_cache_tier_file.h" #include "utilities/persistent_cache/block_cache_tier_file.h"
namespace rocksdb { namespace rocksdb {
@ -54,8 +55,15 @@ Status BlockCacheTier::Open() {
} }
} }
// create a new file
assert(!cache_file_); assert(!cache_file_);
NewCacheFile(); status = NewCacheFile();
if (!status.ok()) {
Error(opt_.log, "Error creating new file %s. %s", opt_.path.c_str(),
status.ToString().c_str());
return status;
}
assert(cache_file_); assert(cache_file_);
if (opt_.pipeline_writes) { if (opt_.pipeline_writes) {
@ -231,7 +239,10 @@ Status BlockCacheTier::InsertImpl(const Slice& key, const Slice& data) {
} }
assert(cache_file_->Eof()); assert(cache_file_->Eof());
NewCacheFile(); Status status = NewCacheFile();
if (!status.ok()) {
return status;
}
} }
// Insert into lookup index // Insert into lookup index
@ -280,7 +291,6 @@ Status BlockCacheTier::Lookup(const Slice& key, unique_ptr<char[]>* val,
status = file->Read(lba, &blk_key, &blk_val, scratch.get()); status = file->Read(lba, &blk_key, &blk_val, scratch.get());
--file->refs_; --file->refs_;
assert(status);
if (!status) { if (!status) {
stats_.cache_misses_++; stats_.cache_misses_++;
stats_.cache_errors_++; stats_.cache_errors_++;
@ -309,25 +319,36 @@ bool BlockCacheTier::Erase(const Slice& key) {
return true; return true;
} }
void BlockCacheTier::NewCacheFile() { Status BlockCacheTier::NewCacheFile() {
lock_.AssertHeld(); lock_.AssertHeld();
Info(opt_.log, "Creating cache file %d", writer_cache_id_); TEST_SYNC_POINT_CALLBACK("BlockCacheTier::NewCacheFile:DeleteDir",
(void*)(GetCachePath().c_str()));
std::unique_ptr<WriteableCacheFile> f(
new WriteableCacheFile(opt_.env, &buffer_allocator_, &writer_,
GetCachePath(), writer_cache_id_,
opt_.cache_file_size, opt_.log));
bool status = f->Create(opt_.enable_direct_writes, opt_.enable_direct_reads);
if (!status) {
return Status::IOError("Error creating file");
}
Info(opt_.log, "Created cache file %d", writer_cache_id_);
writer_cache_id_++; writer_cache_id_++;
cache_file_ = f.release();
cache_file_ = new WriteableCacheFile(opt_.env, &buffer_allocator_, &writer_,
GetCachePath(), writer_cache_id_,
opt_.cache_file_size, opt_.log);
bool status;
status =
cache_file_->Create(opt_.enable_direct_writes, opt_.enable_direct_reads);
assert(status);
// insert to cache files tree // insert to cache files tree
status = metadata_.Insert(cache_file_); status = metadata_.Insert(cache_file_);
(void)status;
assert(status); assert(status);
if (!status) {
Error(opt_.log, "Error inserting to metadata");
return Status::IOError("Error inserting to metadata");
}
return Status::OK();
} }
bool BlockCacheTier::Reserve(const size_t size) { bool BlockCacheTier::Reserve(const size_t size) {

View File

@ -103,7 +103,7 @@ class BlockCacheTier : public PersistentCacheTier {
// insert implementation // insert implementation
Status InsertImpl(const Slice& key, const Slice& data); Status InsertImpl(const Slice& key, const Slice& data);
// Create a new cache file // Create a new cache file
void NewCacheFile(); Status NewCacheFile();
// Get cache directory path // Get cache directory path
std::string GetCachePath() const { return opt_.path + "/cache"; } std::string GetCachePath() const { return opt_.path + "/cache"; }
// Cleanup folder // Cleanup folder

View File

@ -216,7 +216,10 @@ bool RandomAccessCacheFile::Read(const LBA& lba, Slice* key, Slice* val,
ReadLock _(&rwlock_); ReadLock _(&rwlock_);
assert(lba.cache_id_ == cache_id_); assert(lba.cache_id_ == cache_id_);
assert(file_);
if (!file_) {
return false;
}
Slice result; Slice result;
Status s = file_->Read(lba.off_, lba.size_, &result, scratch); Status s = file_->Read(lba.off_, lba.size_, &result, scratch);
@ -259,11 +262,12 @@ WriteableCacheFile::~WriteableCacheFile() {
// This file never flushed. We give priority to shutdown since this is a // This file never flushed. We give priority to shutdown since this is a
// cache // cache
// TODO(krad): Figure a way to flush the pending data // TODO(krad): Figure a way to flush the pending data
assert(file_); if (file_) {
assert(refs_ == 1);
assert(refs_ == 1); --refs_;
--refs_; }
} }
assert(!refs_);
ClearBuffers(); ClearBuffers();
} }

View File

@ -38,6 +38,32 @@ static void OnOpenForWrite(void* arg) {
} }
#endif #endif
static void RemoveDirectory(const std::string& folder) {
std::vector<std::string> files;
Status status = Env::Default()->GetChildren(folder, &files);
if (!status.ok()) {
// we assume the directory does not exist
return;
}
// cleanup files with the patter :digi:.rc
for (auto file : files) {
if (file == "." || file == "..") {
continue;
}
status = Env::Default()->DeleteFile(folder + "/" + file);
assert(status.ok());
}
status = Env::Default()->DeleteDir(folder);
assert(status.ok());
}
static void OnDeleteDir(void* arg) {
char* dir = static_cast<char*>(arg);
RemoveDirectory(std::string(dir));
}
// //
// Simple logger that prints message on stdout // Simple logger that prints message on stdout
// //
@ -116,6 +142,21 @@ PersistentCacheTierTest::PersistentCacheTierTest()
#endif #endif
} }
// Block cache tests
TEST_F(PersistentCacheTierTest, BlockCacheInsertWithFileCreateError) {
cache_ = NewBlockCache(Env::Default(), path_,
/*size=*/std::numeric_limits<uint64_t>::max(),
/*direct_writes=*/ false);
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BlockCacheTier::NewCacheFile:DeleteDir", OnDeleteDir);
RunNegativeInsertTest(/*nthreads=*/ 1,
/*max_keys*/
static_cast<size_t>(10 * 1024 * kStressFactor));
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
}
#ifdef TRAVIS #ifdef TRAVIS
// Travis is unable to handle the normal version of the tests running out of // Travis is unable to handle the normal version of the tests running out of
// fds, out of space and timeouts. This is an easier version of the test // fds, out of space and timeouts. This is an easier version of the test

View File

@ -132,7 +132,12 @@ class PersistentCacheTierTest : public testing::Test {
memset(data, '0' + (i % 10), sizeof(data)); memset(data, '0' + (i % 10), sizeof(data));
auto k = prefix + PaddedNumber(i, /*count=*/8); auto k = prefix + PaddedNumber(i, /*count=*/8);
Slice key(k); Slice key(k);
while (!cache_->Insert(key, data, sizeof(data)).ok()) { while (true) {
Status status = cache_->Insert(key, data, sizeof(data));
if (status.ok()) {
break;
}
ASSERT_TRUE(status.IsTryAgain());
Env::Default()->SleepForMicroseconds(1 * 1000 * 1000); Env::Default()->SleepForMicroseconds(1 * 1000 * 1000);
} }
} }
@ -180,6 +185,17 @@ class PersistentCacheTierTest : public testing::Test {
cache_.reset(); cache_.reset();
} }
// template for negative insert test
void RunNegativeInsertTest(const size_t nthreads, const size_t max_keys) {
Insert(nthreads, max_keys);
Verify(nthreads, /*eviction_enabled=*/true);
ASSERT_LT(stats_verify_hits_, max_keys);
ASSERT_GT(stats_verify_missed_, 0);
cache_->Close();
cache_.reset();
}
// template for insert with eviction test // template for insert with eviction test
void RunInsertTestWithEviction(const size_t nthreads, const size_t max_keys) { void RunInsertTestWithEviction(const size_t nthreads, const size_t max_keys) {
Insert(nthreads, max_keys); Insert(nthreads, max_keys);