From 0a97125ec07f72c0dc0dd11d21327be070a03536 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 14 Aug 2019 16:07:03 -0700 Subject: [PATCH] Fix data races in BlobDB (#5698) Summary: Some accesses to blob_files_ and open_ttl_files_ in BlobDBImpl, as well as to expiration_range_ in BlobFile were not properly synchronized. The patch fixes this and also makes sure the invariant that obsolete_files_ is a subset of blob_files_ holds even when an attempt to delete an obsolete blob file fails. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5698 Test Plan: COMPILE_WITH_TSAN=1 make blob_db_test gtest-parallel --repeat=1000 ./blob_db_test --gtest_filter="*ShutdownWait*" The test fails with TSAN errors ~20 times out of 1000 without the patch but completes successfully 1000 out of 1000 times with the fix. Differential Revision: D16793235 Pulled By: ltamasi fbshipit-source-id: 8034b987598d4fdc9f15098d4589cc49cde484e9 --- utilities/blob_db/blob_db_impl.cc | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 86501280d..8088e4273 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -724,6 +724,7 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/, } if (s.ok()) { if (expiration != kNoExpiration) { + WriteLock file_lock(&blob_file->mutex_); blob_file->ExtendExpirationRange(expiration); } s = CloseBlobFileIfNeeded(blob_file); @@ -1177,6 +1178,8 @@ std::pair BlobDBImpl::SanityCheck(bool aborted) { return std::make_pair(false, -1); } + ReadLock rl(&mutex_); + ROCKS_LOG_INFO(db_options_.info_log, "Starting Sanity Check"); ROCKS_LOG_INFO(db_options_.info_log, "Number of files %" ROCKSDB_PRIszt, blob_files_.size()); @@ -1198,7 +1201,13 @@ std::pair BlobDBImpl::SanityCheck(bool aborted) { blob_file->BlobFileNumber(), blob_file->GetFileSize(), blob_file->BlobCount(), blob_file->Immutable()); if (blob_file->HasTTL()) { - auto expiration_range = blob_file->GetExpirationRange(); + ExpirationRange expiration_range; + + { + ReadLock file_lock(&blob_file->mutex_); + expiration_range = blob_file->GetExpirationRange(); + } + pos += snprintf(buf + pos, sizeof(buf) - pos, ", expiration range (%" PRIu64 ", %" PRIu64 ")", expiration_range.first, expiration_range.second); @@ -1501,7 +1510,14 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr& bfptr, // this reads the key but skips the blob Reader::ReadLevel shallow = Reader::kReadHeaderKey; - bool file_expired = has_ttl && now >= bfptr->GetExpirationRange().second; + ExpirationRange expiration_range; + + { + ReadLock file_lock(&bfptr->mutex_); + expiration_range = bfptr->GetExpirationRange(); + } + + bool file_expired = has_ttl && now >= expiration_range.second; if (!file_expired) { // read the blob because you have to write it back to new file @@ -1761,7 +1777,11 @@ std::pair BlobDBImpl::DeleteObsoleteFiles(bool aborted) { "Will delete file due to snapshot success %s", bfile->PathName().c_str()); - blob_files_.erase(bfile->BlobFileNumber()); + { + WriteLock wl(&mutex_); + blob_files_.erase(bfile->BlobFileNumber()); + } + Status s = DeleteDBFile(&(db_impl_->immutable_db_options()), bfile->PathName(), blob_dir_, true, /*force_fg=*/false); @@ -1794,6 +1814,7 @@ std::pair BlobDBImpl::DeleteObsoleteFiles(bool aborted) { if (!tobsolete.empty()) { WriteLock wl(&mutex_); for (auto bfile : tobsolete) { + blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile)); obsolete_files_.push_front(bfile); } }