Fix race condition caused by concurrent accesses to forceMmapOff_ when opening Posix WritableFile (#9685)

Summary:
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9685

Our TSAN reports a race condition as follows when running test
```
gtest-parallel -r 100 ./external_sst_file_test --gtest_filter=ExternalSSTFileTest.MultiThreaded
```
leads to the following

```
WARNING: ThreadSanitizer: data race (pid=2683148)
  Write of size 1 at 0x556fede63340 by thread T7:
    #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:334 (external_sst_file_test+0xb61ac4)
    #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:382 (external_sst_file_test+0xb5ba96)
    #2 rocksdb::CompositeEnv::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/env/composite_env.cc:334 (external_sst_file_test+0xa6ab7f)
    #3 rocksdb::EnvWrapper::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/include/rocksdb/env.h:1428 (external_sst_file_test+0x561f3e)
Previous read of size 1 at 0x556fede63340 by thread T4:
    #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:328 (external_sst_file_test+0xb61a70)
    #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator
...
```

Fix by making sure the following block gets executed only once:
```
      if (!checkedDiskForMmap_) {
        // this will be executed once in the program's lifetime.
        // do not use mmapWrite on non ext-3/xfs/tmpfs systems.
        if (!SupportsFastAllocate(fname)) {
          forceMmapOff_ = true;
        }
        checkedDiskForMmap_ = true;
      }
```

Reviewed By: pdillinger

Differential Revision: D34780308

fbshipit-source-id: b761f66b24c8b5b8389d86ea371c8542b8d869d5
This commit is contained in:
Yanqin Jin 2022-03-17 19:50:30 -07:00 committed by Facebook GitHub Bot
parent f0fca81fc6
commit 3bdbf67e1a
2 changed files with 23 additions and 23 deletions

View File

@ -20,6 +20,7 @@
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed. * Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
* Fixed a race condition when disable and re-enable manual compaction. * Fixed a race condition when disable and re-enable manual compaction.
* Fixed automatic error recovery failure in atomic flush. * Fixed automatic error recovery failure in atomic flush.
* Fixed a race condition when mmaping a WritableFile on POSIX.
### Public API changes ### Public API changes
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect. * Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.

45
env/fs_posix.cc vendored
View File

@ -325,14 +325,7 @@ class PosixFileSystem : public FileSystem {
SetFD_CLOEXEC(fd, &options); SetFD_CLOEXEC(fd, &options);
if (options.use_mmap_writes) { if (options.use_mmap_writes) {
if (!checkedDiskForMmap_) { MaybeForceDisableMmap(fd);
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fname)) {
forceMmapOff_ = true;
}
checkedDiskForMmap_ = true;
}
} }
if (options.use_mmap_writes && !forceMmapOff_) { if (options.use_mmap_writes && !forceMmapOff_) {
result->reset(new PosixMmapFile(fname, fd, page_size_, options)); result->reset(new PosixMmapFile(fname, fd, page_size_, options));
@ -431,14 +424,7 @@ class PosixFileSystem : public FileSystem {
} }
if (options.use_mmap_writes) { if (options.use_mmap_writes) {
if (!checkedDiskForMmap_) { MaybeForceDisableMmap(fd);
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fname)) {
forceMmapOff_ = true;
}
checkedDiskForMmap_ = true;
}
} }
if (options.use_mmap_writes && !forceMmapOff_) { if (options.use_mmap_writes && !forceMmapOff_) {
result->reset(new PosixMmapFile(fname, fd, page_size_, options)); result->reset(new PosixMmapFile(fname, fd, page_size_, options));
@ -999,8 +985,7 @@ class PosixFileSystem : public FileSystem {
} }
#endif #endif
private: private:
bool checkedDiskForMmap_; bool forceMmapOff_ = false; // do we override Env options?
bool forceMmapOff_; // do we override Env options?
// Returns true iff the named directory exists and is a directory. // Returns true iff the named directory exists and is a directory.
virtual bool DirExists(const std::string& dname) { virtual bool DirExists(const std::string& dname) {
@ -1011,10 +996,10 @@ class PosixFileSystem : public FileSystem {
return false; // stat() failed return false return false; // stat() failed return false
} }
bool SupportsFastAllocate(const std::string& path) { bool SupportsFastAllocate(int fd) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
struct statfs s; struct statfs s;
if (statfs(path.c_str(), &s)) { if (fstatfs(fd, &s)) {
return false; return false;
} }
switch (s.f_type) { switch (s.f_type) {
@ -1028,11 +1013,26 @@ class PosixFileSystem : public FileSystem {
return false; return false;
} }
#else #else
(void)path; (void)fd;
return false; return false;
#endif #endif
} }
void MaybeForceDisableMmap(int fd) {
static std::once_flag s_check_disk_for_mmap_once;
assert(this == FileSystem::Default().get());
std::call_once(
s_check_disk_for_mmap_once,
[this](int fdesc) {
// this will be executed once in the program's lifetime.
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
if (!SupportsFastAllocate(fdesc)) {
forceMmapOff_ = true;
}
},
fd);
}
#ifdef ROCKSDB_IOURING_PRESENT #ifdef ROCKSDB_IOURING_PRESENT
bool IsIOUringEnabled() { bool IsIOUringEnabled() {
if (RocksDbIOUringEnable && RocksDbIOUringEnable()) { if (RocksDbIOUringEnable && RocksDbIOUringEnable()) {
@ -1166,8 +1166,7 @@ size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded(
} }
PosixFileSystem::PosixFileSystem() PosixFileSystem::PosixFileSystem()
: checkedDiskForMmap_(false), : forceMmapOff_(false),
forceMmapOff_(false),
page_size_(getpagesize()), page_size_(getpagesize()),
allow_non_owner_access_(true) { allow_non_owner_access_(true) {
#if defined(ROCKSDB_IOURING_PRESENT) #if defined(ROCKSDB_IOURING_PRESENT)