Make DB::Close() thread-safe (#8970)

Summary:
If `DB::Close()` is called in multi-thread env, the resource
could be double released, which causes exception or assert.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8970

Test Plan:
Test with multi-thread benchmark, with each thread try to
close the DB at the end.

Reviewed By: pdillinger

Differential Revision: D31242042

Pulled By: jay-zhuang

fbshipit-source-id: a61276b1b61e07732e375554106946aea86a23eb
This commit is contained in:
Jay Zhuang 2021-10-18 20:31:32 -07:00 committed by Facebook GitHub Bot
parent 86cf7266c3
commit 314de7e7de
5 changed files with 48 additions and 18 deletions

View File

@ -8,6 +8,7 @@
* Fixed bug in calls to `IngestExternalFiles()` with files for multiple column families. The bug could have introduced a delay in ingested file keys becoming visible after `IngestExternalFiles()` returned. Furthermore, mutations to ingested file keys while they were invisible could have been dropped (not necessarily immediately).
* Fixed a possible race condition impacting users of `WriteBufferManager` who constructed it with `allow_stall == true`. The race condition led to undefined behavior (in our experience, typically a process crash).
* Fixed a bug where stalled writes would remain stalled forever after the user calls `WriteBufferManager::SetBufferSize()` with `new_size == 0` to dynamically disable memory limiting.
* Make `DB::close()` thread-safe.
### New Features
* Print information about blob files when using "ldb list_live_files_metadata"
@ -24,6 +25,7 @@
* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method.
* Some fields of SstFileMetaData are deprecated for compatibility with new base class FileStorageInfo.
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK.
## 6.25.0 (2021-09-20)
### Bug Fixes

View File

@ -1130,9 +1130,15 @@ TEST_F(DBBasicTest, DBCloseFlushError) {
ASSERT_OK(Put("key3", "value3"));
fault_injection_env->SetFilesystemActive(false);
Status s = dbfull()->Close();
fault_injection_env->SetFilesystemActive(true);
ASSERT_NE(s, Status::OK());
// retry should return the same error
s = dbfull()->Close();
ASSERT_NE(s, Status::OK());
fault_injection_env->SetFilesystemActive(true);
// retry close() is no-op even the system is back. Could be improved if
// Close() is retry-able: #9029
s = dbfull()->Close();
ASSERT_NE(s, Status::OK());
Destroy(options);
}
@ -2591,6 +2597,21 @@ TEST_F(DBBasicTest, ManifestChecksumMismatch) {
ASSERT_TRUE(s.IsCorruption());
}
TEST_F(DBBasicTest, ConcurrentlyCloseDB) {
Options options = CurrentOptions();
DestroyAndReopen(options);
std::vector<std::thread> workers;
for (int i = 0; i < 10; i++) {
workers.push_back(std::thread([&]() {
auto s = db_->Close();
ASSERT_OK(s);
}));
}
for (auto& w : workers) {
w.join();
}
}
#ifndef ROCKSDB_LITE
class DBBasicTestTrackWal : public DBTestBase,
public testing::WithParamInterface<bool> {

View File

@ -718,9 +718,11 @@ Status DBImpl::CloseHelper() {
Status DBImpl::CloseImpl() { return CloseHelper(); }
DBImpl::~DBImpl() {
InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
if (!closed_) {
closed_ = true;
CloseHelper().PermitUncheckedError();
closing_status_ = CloseHelper();
closing_status_.PermitUncheckedError();
}
}
@ -4006,7 +4008,10 @@ Status DB::DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family) {
DB::~DB() {}
Status DBImpl::Close() {
if (!closed_) {
InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
if (closed_) {
return closing_status_;
}
{
InstrumentedMutexLock l(&mutex_);
// If there is unreleased snapshot, fail the close call
@ -4014,11 +4019,9 @@ Status DBImpl::Close() {
return Status::Aborted("Cannot close DB with unreleased snapshot.");
}
}
closing_status_ = CloseImpl();
closed_ = true;
return CloseImpl();
}
return Status::OK();
return closing_status_;
}
Status DB::ListColumnFamilies(const DBOptions& db_options,

View File

@ -2290,6 +2290,10 @@ class DBImpl : public DB {
// Flag to check whether Close() has been called on this DB
bool closed_;
// save the closing status, for re-calling the close()
Status closing_status_;
// mutex for DB::Close()
InstrumentedMutex closing_mutex_;
// Conditional variable to coordinate installation of atomic flush results.
// With atomic flush, each bg thread installs the result of flushing multiple

View File

@ -285,9 +285,9 @@ class DB {
// If the return status is Aborted(), closing fails because there is
// unreleased snapshot in the system. In this case, users can release
// the unreleased snapshots and try again and expect it to succeed. For
// other status, recalling Close() will be no-op.
// If the return status is NotSupported(), then the DB implementation does
// cleanup in the destructor
// other status, re-calling Close() will be no-op and return the original
// close status. If the return status is NotSupported(), then the DB
// implementation does cleanup in the destructor
virtual Status Close() { return Status::NotSupported(); }
// ListColumnFamilies will open the DB specified by argument name