use atomic O_CLOEXEC when available (#4328)
Summary: In our application we spawn helper child processes concurrently with opening rocksdb. In one situation I observed that the child process had inherited the rocksdb lock file as well as directory handles to the rocksdb storage location. The code in env_posix takes care to set CLOEXEC but doesn't use `O_CLOEXEC` at the time that the files are opened which means that there is a window of opportunity to leak the descriptors across a fork/exec boundary. This diff introduces a helper that can conditionally set the `O_CLOEXEC` bit for the open call using the same logic as that in the existing helper for setting that flag post-open. I've preserved the post-open logic for systems that don't have `O_CLOEXEC`. I've introduced setting `O_CLOEXEC` for what appears to be a number of temporary or transient files and directory handles; I suspect that none of the files opened by Rocks are intended to be inherited by a forked child process. In one case, `fopen` is used to open a file. I've added the use of the glibc-specific `e` mode to turn on `O_CLOEXEC` for this case. While this doesn't cover all posix systems, it is an improvement for our common deployment system. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4328 Reviewed By: ajkr Differential Revision: D9553046 Pulled By: wez fbshipit-source-id: acdb89f7a85ca649b22fe3c3bd76f82142bec2bf
This commit is contained in:
parent
927f274939
commit
d00e5de7fc
45
env/env_posix.cc
vendored
45
env/env_posix.cc
vendored
@ -102,6 +102,18 @@ class PosixFileLock : public FileLock {
|
|||||||
std::string filename;
|
std::string filename;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int cloexec_flags(int flags, const EnvOptions* options) {
|
||||||
|
// If the system supports opening the file with cloexec enabled,
|
||||||
|
// do so, as this avoids a race condition if a db is opened around
|
||||||
|
// the same time that a child process is forked
|
||||||
|
#ifdef O_CLOEXEC
|
||||||
|
if (options == nullptr || options->set_fd_cloexec) {
|
||||||
|
flags |= O_CLOEXEC;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
class PosixEnv : public Env {
|
class PosixEnv : public Env {
|
||||||
public:
|
public:
|
||||||
PosixEnv();
|
PosixEnv();
|
||||||
@ -133,7 +145,7 @@ class PosixEnv : public Env {
|
|||||||
const EnvOptions& options) override {
|
const EnvOptions& options) override {
|
||||||
result->reset();
|
result->reset();
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
int flags = O_RDONLY;
|
int flags = cloexec_flags(O_RDONLY, &options);
|
||||||
FILE* file = nullptr;
|
FILE* file = nullptr;
|
||||||
|
|
||||||
if (options.use_direct_reads && !options.use_mmap_reads) {
|
if (options.use_direct_reads && !options.use_mmap_reads) {
|
||||||
@ -184,7 +196,8 @@ class PosixEnv : public Env {
|
|||||||
result->reset();
|
result->reset();
|
||||||
Status s;
|
Status s;
|
||||||
int fd;
|
int fd;
|
||||||
int flags = O_RDONLY;
|
int flags = cloexec_flags(O_RDONLY, &options);
|
||||||
|
|
||||||
if (options.use_direct_reads && !options.use_mmap_reads) {
|
if (options.use_direct_reads && !options.use_mmap_reads) {
|
||||||
#ifdef ROCKSDB_LITE
|
#ifdef ROCKSDB_LITE
|
||||||
return Status::IOError(fname, "Direct I/O not supported in RocksDB lite");
|
return Status::IOError(fname, "Direct I/O not supported in RocksDB lite");
|
||||||
@ -266,6 +279,8 @@ class PosixEnv : public Env {
|
|||||||
flags |= O_WRONLY;
|
flags |= O_WRONLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flags = cloexec_flags(flags, &options);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
|
fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
|
||||||
@ -354,6 +369,8 @@ class PosixEnv : public Env {
|
|||||||
flags |= O_WRONLY;
|
flags |= O_WRONLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flags = cloexec_flags(flags, &options);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(old_fname.c_str(), flags,
|
fd = open(old_fname.c_str(), flags,
|
||||||
@ -415,9 +432,12 @@ class PosixEnv : public Env {
|
|||||||
unique_ptr<RandomRWFile>* result,
|
unique_ptr<RandomRWFile>* result,
|
||||||
const EnvOptions& options) override {
|
const EnvOptions& options) override {
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
|
int flags = cloexec_flags(O_RDWR, &options);
|
||||||
|
|
||||||
while (fd < 0) {
|
while (fd < 0) {
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(fname.c_str(), O_RDWR, GetDBFileMode(allow_non_owner_access_));
|
|
||||||
|
fd = open(fname.c_str(), flags, GetDBFileMode(allow_non_owner_access_));
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
// Error while opening the file
|
// Error while opening the file
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
@ -437,9 +457,11 @@ class PosixEnv : public Env {
|
|||||||
unique_ptr<MemoryMappedFileBuffer>* result) override {
|
unique_ptr<MemoryMappedFileBuffer>* result) override {
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
Status status;
|
Status status;
|
||||||
|
int flags = cloexec_flags(O_RDWR, nullptr);
|
||||||
|
|
||||||
while (fd < 0) {
|
while (fd < 0) {
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(fname.c_str(), O_RDWR, 0644);
|
fd = open(fname.c_str(), flags, 0644);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
// Error while opening the file
|
// Error while opening the file
|
||||||
if (errno == EINTR) {
|
if (errno == EINTR) {
|
||||||
@ -477,9 +499,10 @@ class PosixEnv : public Env {
|
|||||||
unique_ptr<Directory>* result) override {
|
unique_ptr<Directory>* result) override {
|
||||||
result->reset();
|
result->reset();
|
||||||
int fd;
|
int fd;
|
||||||
|
int flags = cloexec_flags(0, nullptr);
|
||||||
{
|
{
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(name.c_str(), 0);
|
fd = open(name.c_str(), flags);
|
||||||
}
|
}
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
return IOError("While open directory", name, errno);
|
return IOError("While open directory", name, errno);
|
||||||
@ -663,9 +686,11 @@ class PosixEnv : public Env {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int fd;
|
int fd;
|
||||||
|
int flags = cloexec_flags(O_RDWR | O_CREAT, nullptr);
|
||||||
|
|
||||||
{
|
{
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
|
fd = open(fname.c_str(), flags, 0644);
|
||||||
}
|
}
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
result = IOError("while open a file for lock", fname, errno);
|
result = IOError("while open a file for lock", fname, errno);
|
||||||
@ -756,7 +781,13 @@ class PosixEnv : public Env {
|
|||||||
FILE* f;
|
FILE* f;
|
||||||
{
|
{
|
||||||
IOSTATS_TIMER_GUARD(open_nanos);
|
IOSTATS_TIMER_GUARD(open_nanos);
|
||||||
f = fopen(fname.c_str(), "w");
|
f = fopen(fname.c_str(), "w"
|
||||||
|
#ifdef __GLIBC_PREREQ
|
||||||
|
#if __GLIBC_PREREQ(2, 7)
|
||||||
|
"e" // glibc extension to enable O_CLOEXEC
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
);
|
||||||
}
|
}
|
||||||
if (f == nullptr) {
|
if (f == nullptr) {
|
||||||
result->reset();
|
result->reset();
|
||||||
|
Loading…
Reference in New Issue
Block a user