Use fcntl(F_FULLFSYNC) on OS X (#9356)
Summary:
Closing https://github.com/facebook/rocksdb/issues/5954
fsync/fdatasync on Linux:
```
(fsync/fdatasync) includes writing through or flushing a disk cache if present.
```
However, on OS X and iOS:
```
(fsync) will flush all data from the host to the drive (i.e. the "permanent storage device"),
the drive itself may not physically write the data to the platters for quite some time and it
may be written in an out-of-order sequence.
```
Solution is to use `fcntl(F_FULLFSYNC)` on OS X so that we get the same
persistence guarantee.
According to OSX man page,
```
The F_FULLFSYNC fcntl asks the drive to flush **all** buffered data to permanent storage.
```
This suggests that it will be no faster than `fsync` on Linux, since Linux, according to its man page,
```
writing through or flushing a disk cache if present
```
It means Linux may not flush **all** data from disk cache.
This is similar to bug reports/fixes in:
- golang: https://github.com/golang/go/issues/26650
- leveldb: 296de8d5b8
.
Not sure if we should fallback to fsync since we break persistence contract.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9356
Reviewed By: jay-zhuang
Differential Revision: D33417416
Pulled By: riversand963
fbshipit-source-id: 475548ff9c5eaccde325e0f6842694271cbc8cb7
This commit is contained in:
parent
5576ded762
commit
1a8e9f0e07
@ -616,6 +616,11 @@ if(HAVE_AUXV_GETAUXVAL)
|
||||
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
|
||||
endif()
|
||||
|
||||
check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
|
||||
if(HAVE_FULLFSYNC)
|
||||
add_definitions(-DHAVE_FULLFSYNC)
|
||||
endif()
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
### Bug Fixes
|
||||
* Fix a bug that FlushMemTable may return ok even flush not succeed.
|
||||
* Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS.
|
||||
|
||||
## 6.28.0 (2021-12-17)
|
||||
### New Features
|
||||
|
@ -842,7 +842,19 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -f test.o
|
||||
# check for F_FULLFSYNC
|
||||
$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null <<EOF
|
||||
#include <fcntl.h>
|
||||
int main() {
|
||||
fcntl(0, F_FULLFSYNC);
|
||||
return 0;
|
||||
}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
|
||||
fi
|
||||
|
||||
rm -f test.o test_dl.o
|
||||
|
||||
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
|
||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
|
||||
|
47
env/io_posix.cc
vendored
47
env/io_posix.cc
vendored
@ -1108,9 +1108,15 @@ IOStatus PosixMmapFile::Flush(const IOOptions& /*opts*/,
|
||||
|
||||
IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("while fcntl(F_FULLSYNC) mmapped file", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fdatasync(fd_) < 0) {
|
||||
return IOError("While fdatasync mmapped file", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
|
||||
return Msync();
|
||||
}
|
||||
@ -1120,9 +1126,15 @@ IOStatus PosixMmapFile::Sync(const IOOptions& /*opts*/,
|
||||
*/
|
||||
IOStatus PosixMmapFile::Fsync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("While fcntl(F_FULLSYNC) on mmaped file", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fsync(fd_) < 0) {
|
||||
return IOError("While fsync mmaped file", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
|
||||
return Msync();
|
||||
}
|
||||
@ -1320,17 +1332,29 @@ IOStatus PosixWritableFile::Flush(const IOOptions& /*opts*/,
|
||||
|
||||
IOStatus PosixWritableFile::Sync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fdatasync(fd_) < 0) {
|
||||
return IOError("While fdatasync", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
IOStatus PosixWritableFile::Fsync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("while fcntl(F_FULLFSYNC)", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fsync(fd_) < 0) {
|
||||
return IOError("While fsync", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
@ -1503,17 +1527,29 @@ IOStatus PosixRandomRWFile::Flush(const IOOptions& /*opts*/,
|
||||
|
||||
IOStatus PosixRandomRWFile::Sync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("while fcntl(F_FULLFSYNC) random rw file", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fdatasync(fd_) < 0) {
|
||||
return IOError("While fdatasync random read/write file", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
IOStatus PosixRandomRWFile::Fsync(const IOOptions& /*opts*/,
|
||||
IODebugContext* /*dbg*/) {
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("While fcntl(F_FULLSYNC) random rw file", filename_, errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fsync(fd_) < 0) {
|
||||
return IOError("While fsync random read/write file", filename_, errno);
|
||||
}
|
||||
#endif // HAVE_FULLFSYNC
|
||||
return IOStatus::OK();
|
||||
}
|
||||
|
||||
@ -1585,10 +1621,19 @@ IOStatus PosixDirectory::FsyncWithDirOptions(
|
||||
}
|
||||
// fallback to dir-fsync for kDefault, kDirRenamed and kFileDeleted
|
||||
}
|
||||
#ifdef HAVE_FULLFSYNC
|
||||
// btrfs is a Linux file system, while currently F_FULLFSYNC is available on
|
||||
// Mac OS.
|
||||
assert(!is_btrfs_);
|
||||
if (::fcntl(fd_, F_FULLFSYNC) < 0) {
|
||||
return IOError("while fcntl(F_FULLFSYNC)", "a directory", errno);
|
||||
}
|
||||
#else // HAVE_FULLFSYNC
|
||||
if (fsync(fd_) == -1) {
|
||||
s = IOError("While fsync", "a directory", errno);
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_FULLFSYNC
|
||||
#endif // OS_AIX
|
||||
return s;
|
||||
}
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
Loading…
Reference in New Issue
Block a user