Fix race condition caused by concurrent accesses to forceMmapOff_ when opening Posix WritableFile (#9685)
Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/9685 Our TSAN reports a race condition as follows when running test ``` gtest-parallel -r 100 ./external_sst_file_test --gtest_filter=ExternalSSTFileTest.MultiThreaded ``` leads to the following ``` WARNING: ThreadSanitizer: data race (pid=2683148) Write of size 1 at 0x556fede63340 by thread T7: #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:334 (external_sst_file_test+0xb61ac4) #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:382 (external_sst_file_test+0xb5ba96) #2 rocksdb::CompositeEnv::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/env/composite_env.cc:334 (external_sst_file_test+0xa6ab7f) #3 rocksdb::EnvWrapper::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unique_ptr<rocksdb::WritableFile, std::default_delete<rocksdb::WritableFile> >*, rocksdb::EnvOptions const&) internal_repo_rocksdb/repo/include/rocksdb/env.h:1428 (external_sst_file_test+0x561f3e) Previous read of size 1 at 0x556fede63340 by thread T4: #0 rocksdb::(anonymous namespace)::PosixFileSystem::OpenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, rocksdb::FileOptions const&, bool, std::unique_ptr<rocksdb::FSWritableFile, std::default_delete<rocksdb::FSWritableFile> >*, rocksdb::IODebugContext*) internal_repo_rocksdb/repo/env/fs_posix.cc:328 (external_sst_file_test+0xb61a70) #1 rocksdb::(anonymous namespace)::PosixFileSystem::ReopenWritableFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator ... ``` Fix by making sure the following block gets executed only once: ``` if (!checkedDiskForMmap_) { // this will be executed once in the program's lifetime. // do not use mmapWrite on non ext-3/xfs/tmpfs systems. if (!SupportsFastAllocate(fname)) { forceMmapOff_ = true; } checkedDiskForMmap_ = true; } ``` Reviewed By: pdillinger Differential Revision: D34780308 fbshipit-source-id: b761f66b24c8b5b8389d86ea371c8542b8d869d5
This commit is contained in:
parent
f0fca81fc6
commit
3bdbf67e1a
@ -20,6 +20,7 @@
|
|||||||
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
|
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
|
||||||
* Fixed a race condition when disable and re-enable manual compaction.
|
* Fixed a race condition when disable and re-enable manual compaction.
|
||||||
* Fixed automatic error recovery failure in atomic flush.
|
* Fixed automatic error recovery failure in atomic flush.
|
||||||
|
* Fixed a race condition when mmaping a WritableFile on POSIX.
|
||||||
|
|
||||||
### Public API changes
|
### Public API changes
|
||||||
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.
|
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.
|
||||||
|
45
env/fs_posix.cc
vendored
45
env/fs_posix.cc
vendored
@ -325,14 +325,7 @@ class PosixFileSystem : public FileSystem {
|
|||||||
SetFD_CLOEXEC(fd, &options);
|
SetFD_CLOEXEC(fd, &options);
|
||||||
|
|
||||||
if (options.use_mmap_writes) {
|
if (options.use_mmap_writes) {
|
||||||
if (!checkedDiskForMmap_) {
|
MaybeForceDisableMmap(fd);
|
||||||
// this will be executed once in the program's lifetime.
|
|
||||||
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
|
|
||||||
if (!SupportsFastAllocate(fname)) {
|
|
||||||
forceMmapOff_ = true;
|
|
||||||
}
|
|
||||||
checkedDiskForMmap_ = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (options.use_mmap_writes && !forceMmapOff_) {
|
if (options.use_mmap_writes && !forceMmapOff_) {
|
||||||
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
|
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
|
||||||
@ -431,14 +424,7 @@ class PosixFileSystem : public FileSystem {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (options.use_mmap_writes) {
|
if (options.use_mmap_writes) {
|
||||||
if (!checkedDiskForMmap_) {
|
MaybeForceDisableMmap(fd);
|
||||||
// this will be executed once in the program's lifetime.
|
|
||||||
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
|
|
||||||
if (!SupportsFastAllocate(fname)) {
|
|
||||||
forceMmapOff_ = true;
|
|
||||||
}
|
|
||||||
checkedDiskForMmap_ = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (options.use_mmap_writes && !forceMmapOff_) {
|
if (options.use_mmap_writes && !forceMmapOff_) {
|
||||||
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
|
result->reset(new PosixMmapFile(fname, fd, page_size_, options));
|
||||||
@ -999,8 +985,7 @@ class PosixFileSystem : public FileSystem {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
private:
|
private:
|
||||||
bool checkedDiskForMmap_;
|
bool forceMmapOff_ = false; // do we override Env options?
|
||||||
bool forceMmapOff_; // do we override Env options?
|
|
||||||
|
|
||||||
// Returns true iff the named directory exists and is a directory.
|
// Returns true iff the named directory exists and is a directory.
|
||||||
virtual bool DirExists(const std::string& dname) {
|
virtual bool DirExists(const std::string& dname) {
|
||||||
@ -1011,10 +996,10 @@ class PosixFileSystem : public FileSystem {
|
|||||||
return false; // stat() failed return false
|
return false; // stat() failed return false
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SupportsFastAllocate(const std::string& path) {
|
bool SupportsFastAllocate(int fd) {
|
||||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||||
struct statfs s;
|
struct statfs s;
|
||||||
if (statfs(path.c_str(), &s)) {
|
if (fstatfs(fd, &s)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
switch (s.f_type) {
|
switch (s.f_type) {
|
||||||
@ -1028,11 +1013,26 @@ class PosixFileSystem : public FileSystem {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)path;
|
(void)fd;
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MaybeForceDisableMmap(int fd) {
|
||||||
|
static std::once_flag s_check_disk_for_mmap_once;
|
||||||
|
assert(this == FileSystem::Default().get());
|
||||||
|
std::call_once(
|
||||||
|
s_check_disk_for_mmap_once,
|
||||||
|
[this](int fdesc) {
|
||||||
|
// this will be executed once in the program's lifetime.
|
||||||
|
// do not use mmapWrite on non ext-3/xfs/tmpfs systems.
|
||||||
|
if (!SupportsFastAllocate(fdesc)) {
|
||||||
|
forceMmapOff_ = true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
fd);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ROCKSDB_IOURING_PRESENT
|
#ifdef ROCKSDB_IOURING_PRESENT
|
||||||
bool IsIOUringEnabled() {
|
bool IsIOUringEnabled() {
|
||||||
if (RocksDbIOUringEnable && RocksDbIOUringEnable()) {
|
if (RocksDbIOUringEnable && RocksDbIOUringEnable()) {
|
||||||
@ -1166,8 +1166,7 @@ size_t PosixFileSystem::GetLogicalBlockSizeForWriteIfNeeded(
|
|||||||
}
|
}
|
||||||
|
|
||||||
PosixFileSystem::PosixFileSystem()
|
PosixFileSystem::PosixFileSystem()
|
||||||
: checkedDiskForMmap_(false),
|
: forceMmapOff_(false),
|
||||||
forceMmapOff_(false),
|
|
||||||
page_size_(getpagesize()),
|
page_size_(getpagesize()),
|
||||||
allow_non_owner_access_(true) {
|
allow_non_owner_access_(true) {
|
||||||
#if defined(ROCKSDB_IOURING_PRESENT)
|
#if defined(ROCKSDB_IOURING_PRESENT)
|
||||||
|
Loading…
Reference in New Issue
Block a user