Fix data races in BlobDB (#5698)

Summary:
Some accesses to blob_files_ and open_ttl_files_ in BlobDBImpl, as well
as to expiration_range_ in BlobFile were not properly synchronized.
The patch fixes this and also makes sure the invariant that obsolete_files_
is a subset of blob_files_ holds even when an attempt to delete an obsolete
blob file fails.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5698

Test Plan:
COMPILE_WITH_TSAN=1 make blob_db_test
gtest-parallel --repeat=1000 ./blob_db_test --gtest_filter="*ShutdownWait*"

The test fails with TSAN errors ~20 times out of 1000 without the patch but
completes successfully 1000 out of 1000 times with the fix.

Differential Revision: D16793235

Pulled By: ltamasi

fbshipit-source-id: 8034b987598d4fdc9f15098d4589cc49cde484e9
This commit is contained in:
Levi Tamasi 2019-08-14 16:07:03 -07:00 committed by Facebook Github Bot
parent 4c70cb7306
commit 0a97125ec0

View File

@ -724,6 +724,7 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
}
if (s.ok()) {
if (expiration != kNoExpiration) {
WriteLock file_lock(&blob_file->mutex_);
blob_file->ExtendExpirationRange(expiration);
}
s = CloseBlobFileIfNeeded(blob_file);
@ -1177,6 +1178,8 @@ std::pair<bool, int64_t> BlobDBImpl::SanityCheck(bool aborted) {
return std::make_pair(false, -1);
}
ReadLock rl(&mutex_);
ROCKS_LOG_INFO(db_options_.info_log, "Starting Sanity Check");
ROCKS_LOG_INFO(db_options_.info_log, "Number of files %" ROCKSDB_PRIszt,
blob_files_.size());
@ -1198,7 +1201,13 @@ std::pair<bool, int64_t> BlobDBImpl::SanityCheck(bool aborted) {
blob_file->BlobFileNumber(), blob_file->GetFileSize(),
blob_file->BlobCount(), blob_file->Immutable());
if (blob_file->HasTTL()) {
auto expiration_range = blob_file->GetExpirationRange();
ExpirationRange expiration_range;
{
ReadLock file_lock(&blob_file->mutex_);
expiration_range = blob_file->GetExpirationRange();
}
pos += snprintf(buf + pos, sizeof(buf) - pos,
", expiration range (%" PRIu64 ", %" PRIu64 ")",
expiration_range.first, expiration_range.second);
@ -1501,7 +1510,14 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
// this reads the key but skips the blob
Reader::ReadLevel shallow = Reader::kReadHeaderKey;
bool file_expired = has_ttl && now >= bfptr->GetExpirationRange().second;
ExpirationRange expiration_range;
{
ReadLock file_lock(&bfptr->mutex_);
expiration_range = bfptr->GetExpirationRange();
}
bool file_expired = has_ttl && now >= expiration_range.second;
if (!file_expired) {
// read the blob because you have to write it back to new file
@ -1761,7 +1777,11 @@ std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
"Will delete file due to snapshot success %s",
bfile->PathName().c_str());
blob_files_.erase(bfile->BlobFileNumber());
{
WriteLock wl(&mutex_);
blob_files_.erase(bfile->BlobFileNumber());
}
Status s = DeleteDBFile(&(db_impl_->immutable_db_options()),
bfile->PathName(), blob_dir_, true,
/*force_fg=*/false);
@ -1794,6 +1814,7 @@ std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
if (!tobsolete.empty()) {
WriteLock wl(&mutex_);
for (auto bfile : tobsolete) {
blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile));
obsolete_files_.push_front(bfile);
}
}