detect logical sector size
Summary: querying logical sector size from the device instead of hardcoding it for linux platform. Closes https://github.com/facebook/rocksdb/pull/1875 Differential Revision: D4591946 Pulled By: ajkr fbshipit-source-id: 4e9805c
This commit is contained in:
parent
ed50308d20
commit
1ef5f50e84
@ -690,17 +690,17 @@ class IoctlFriendlyTmpdir {
|
||||
std::string dir_;
|
||||
};
|
||||
|
||||
TEST_P(EnvPosixTestWithParam, PositionedAppend) {
|
||||
if (direct_io_ && env_ == Env::Default()) {
|
||||
|
||||
TEST_F(EnvPosixTest, PositionedAppend) {
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
|
||||
EnvOptions options;
|
||||
options.use_direct_writes = direct_io_;
|
||||
options.use_direct_writes = true;
|
||||
options.use_mmap_writes = false;
|
||||
IoctlFriendlyTmpdir ift;
|
||||
ASSERT_OK(
|
||||
env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
|
||||
ASSERT_OK(env_->NewWritableFile(ift.name() + "/f", &writable_file, options));
|
||||
|
||||
const size_t kBlockSize = 512;
|
||||
const size_t kBlockSize = 4096;
|
||||
const size_t kPageSize = 4096;
|
||||
const size_t kDataSize = kPageSize;
|
||||
// Write a page worth of 'a'
|
||||
@ -724,7 +724,6 @@ TEST_P(EnvPosixTestWithParam, PositionedAppend) {
|
||||
ASSERT_EQ('a', result[kBlockSize - 1]);
|
||||
ASSERT_EQ('b', result[kBlockSize]);
|
||||
}
|
||||
}
|
||||
|
||||
// Only works in linux platforms
|
||||
TEST_P(EnvPosixTestWithParam, RandomAccessUniqueID) {
|
||||
@ -1155,7 +1154,7 @@ TEST_P(EnvPosixTestWithParam, Preallocation) {
|
||||
ASSERT_EQ(last_allocated_block, 0UL);
|
||||
|
||||
// Small write should preallocate one block
|
||||
size_t kStrSize = 512;
|
||||
size_t kStrSize = 4096;
|
||||
auto data = NewAligned(kStrSize, 'A');
|
||||
Slice str(data.get(), kStrSize);
|
||||
srcfile->PrepareWrite(srcfile->GetFileSize(), kStrSize);
|
||||
@ -1212,7 +1211,7 @@ TEST_P(EnvPosixTestWithParam, ConsistentChildrenAttributes) {
|
||||
auto buf_ptr = NewAligned(data.size(), 'T');
|
||||
Slice buf(buf_ptr.get(), data.size());
|
||||
file->Append(buf);
|
||||
data.append(std::string(512, 'T'));
|
||||
data.append(std::string(4096, 'T'));
|
||||
}
|
||||
|
||||
std::vector<Env::FileAttributes> file_attrs;
|
||||
@ -1229,7 +1228,7 @@ TEST_P(EnvPosixTestWithParam, ConsistentChildrenAttributes) {
|
||||
ASSERT_TRUE(file_attrs_iter != file_attrs.end());
|
||||
uint64_t size;
|
||||
ASSERT_OK(env_->GetFileSize(path, &size));
|
||||
ASSERT_EQ(size, 512 * i);
|
||||
ASSERT_EQ(size, 4096 * i);
|
||||
ASSERT_EQ(size, file_attrs_iter->size_bytes);
|
||||
}
|
||||
rocksdb::SyncPoint::GetInstance()->ClearTrace();
|
||||
|
@ -8,7 +8,6 @@
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifdef ROCKSDB_LIB_IO_POSIX
|
||||
|
||||
#include "util/io_posix.h"
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
@ -47,20 +46,84 @@ int Fadvise(int fd, off_t offset, size_t len, int advice) {
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace {
|
||||
size_t GetLogicalBufferSize(int __attribute__((__unused__)) fd) {
|
||||
#ifdef OS_LINUX
|
||||
struct stat buf;
|
||||
int result = fstat(fd, &buf);
|
||||
if (result == -1) {
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
if (major(buf.st_dev) == 0) {
|
||||
// Unnamed devices (e.g. non-device mounts), reserved as null device number.
|
||||
// These don't have an entry in /sys/dev/block/. Return a sensible default.
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
|
||||
// Reading queue/logical_block_size does not require special permissions.
|
||||
const int kBufferSize = 100;
|
||||
char path[kBufferSize];
|
||||
char real_path[PATH_MAX + 1];
|
||||
snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev),
|
||||
minor(buf.st_dev));
|
||||
if (realpath(path, real_path) == nullptr) {
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
std::string device_dir(real_path);
|
||||
if (!device_dir.empty() && device_dir.back() == '/') {
|
||||
device_dir.pop_back();
|
||||
}
|
||||
// NOTE: sda3 does not have a `queue/` subdir, only the parent sda has it.
|
||||
// $ ls -al '/sys/dev/block/8:3'
|
||||
// lrwxrwxrwx. 1 root root 0 Jun 26 01:38 /sys/dev/block/8:3 ->
|
||||
// ../../block/sda/sda3
|
||||
size_t parent_end = device_dir.rfind('/', device_dir.length() - 1);
|
||||
if (parent_end == std::string::npos) {
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
size_t parent_begin = device_dir.rfind('/', parent_end - 1);
|
||||
if (parent_begin == std::string::npos) {
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
if (device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1) !=
|
||||
"block") {
|
||||
device_dir = device_dir.substr(0, parent_end);
|
||||
}
|
||||
std::string fname = device_dir + "/queue/logical_block_size";
|
||||
FILE* fp;
|
||||
size_t size = 0;
|
||||
fp = fopen(fname.c_str(), "r");
|
||||
if (fp != nullptr) {
|
||||
char* line = nullptr;
|
||||
size_t len = 0;
|
||||
if (getline(&line, &len, fp) != -1) {
|
||||
sscanf(line, "%zu", &size);
|
||||
}
|
||||
free(line);
|
||||
fclose(fp);
|
||||
}
|
||||
if (size != 0 && (size & (size - 1)) == 0) {
|
||||
return size;
|
||||
}
|
||||
#endif
|
||||
return kDefaultPageSize;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
/*
|
||||
* DirectIOHelper
|
||||
*/
|
||||
#ifndef NDEBUG
|
||||
namespace {
|
||||
const size_t kSectorSize = 512;
|
||||
#ifdef OS_LINUX
|
||||
const size_t kPageSize = sysconf(_SC_PAGESIZE);
|
||||
#else
|
||||
const size_t kPageSize = 4 * 1024;
|
||||
#endif
|
||||
|
||||
|
||||
bool IsSectorAligned(const size_t off) { return off % kSectorSize == 0; }
|
||||
bool IsSectorAligned(const size_t off, size_t sector_size) {
|
||||
return off % sector_size == 0;
|
||||
}
|
||||
|
||||
static bool IsPageAligned(const void* ptr) {
|
||||
return uintptr_t(ptr) % (kPageSize) == 0;
|
||||
@ -77,7 +140,8 @@ PosixSequentialFile::PosixSequentialFile(const std::string& fname, FILE* file,
|
||||
: filename_(fname),
|
||||
file_(file),
|
||||
fd_(fd),
|
||||
use_direct_io_(options.use_direct_reads) {
|
||||
use_direct_io_(options.use_direct_reads),
|
||||
logical_sector_size_(GetLogicalBufferSize(fd_)) {
|
||||
assert(!options.use_direct_reads || !options.use_mmap_reads);
|
||||
}
|
||||
|
||||
@ -230,7 +294,10 @@ size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
|
||||
*/
|
||||
PosixRandomAccessFile::PosixRandomAccessFile(const std::string& fname, int fd,
|
||||
const EnvOptions& options)
|
||||
: filename_(fname), fd_(fd), use_direct_io_(options.use_direct_reads) {
|
||||
: filename_(fname),
|
||||
fd_(fd),
|
||||
use_direct_io_(options.use_direct_reads),
|
||||
logical_sector_size_(GetLogicalBufferSize(fd_)) {
|
||||
assert(!options.use_direct_reads || !options.use_mmap_reads);
|
||||
assert(!options.use_mmap_reads || sizeof(void*) < 8);
|
||||
}
|
||||
@ -601,7 +668,8 @@ PosixWritableFile::PosixWritableFile(const std::string& fname, int fd,
|
||||
: filename_(fname),
|
||||
use_direct_io_(options.use_direct_writes),
|
||||
fd_(fd),
|
||||
filesize_(0) {
|
||||
filesize_(0),
|
||||
logical_sector_size_(GetLogicalBufferSize(fd_)) {
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
allow_fallocate_ = options.allow_fallocate;
|
||||
fallocate_with_keep_size_ = options.fallocate_with_keep_size;
|
||||
@ -616,7 +684,9 @@ PosixWritableFile::~PosixWritableFile() {
|
||||
}
|
||||
|
||||
Status PosixWritableFile::Append(const Slice& data) {
|
||||
assert(!use_direct_io() || (IsSectorAligned(data.size()) && IsPageAligned(data.data())));
|
||||
assert(!use_direct_io() ||
|
||||
(IsSectorAligned(data.size(), GetRequiredBufferAlignment()) &&
|
||||
IsPageAligned(data.data())));
|
||||
const char* src = data.data();
|
||||
size_t left = data.size();
|
||||
while (left != 0) {
|
||||
@ -635,8 +705,10 @@ Status PosixWritableFile::Append(const Slice& data) {
|
||||
}
|
||||
|
||||
Status PosixWritableFile::PositionedAppend(const Slice& data, uint64_t offset) {
|
||||
assert(use_direct_io() && IsSectorAligned(offset) &&
|
||||
IsSectorAligned(data.size()) && IsPageAligned(data.data()));
|
||||
assert(use_direct_io() &&
|
||||
IsSectorAligned(offset, GetRequiredBufferAlignment()) &&
|
||||
IsSectorAligned(data.size(), GetRequiredBufferAlignment()) &&
|
||||
IsPageAligned(data.data()));
|
||||
assert(offset <= std::numeric_limits<off_t>::max());
|
||||
const char* src = data.data();
|
||||
size_t left = data.size();
|
||||
|
@ -47,6 +47,7 @@ class PosixSequentialFile : public SequentialFile {
|
||||
FILE* file_;
|
||||
int fd_;
|
||||
bool use_direct_io_;
|
||||
size_t logical_sector_size_;
|
||||
|
||||
public:
|
||||
PosixSequentialFile(const std::string& fname, FILE* file, int fd,
|
||||
@ -59,6 +60,9 @@ class PosixSequentialFile : public SequentialFile {
|
||||
virtual Status Skip(uint64_t n) override;
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
virtual bool use_direct_io() const override { return use_direct_io_; }
|
||||
virtual size_t GetRequiredBufferAlignment() const override {
|
||||
return logical_sector_size_;
|
||||
}
|
||||
};
|
||||
|
||||
class PosixRandomAccessFile : public RandomAccessFile {
|
||||
@ -66,6 +70,7 @@ class PosixRandomAccessFile : public RandomAccessFile {
|
||||
std::string filename_;
|
||||
int fd_;
|
||||
bool use_direct_io_;
|
||||
size_t logical_sector_size_;
|
||||
|
||||
public:
|
||||
PosixRandomAccessFile(const std::string& fname, int fd,
|
||||
@ -80,6 +85,9 @@ class PosixRandomAccessFile : public RandomAccessFile {
|
||||
virtual void Hint(AccessPattern pattern) override;
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
virtual bool use_direct_io() const override { return use_direct_io_; }
|
||||
virtual size_t GetRequiredBufferAlignment() const override {
|
||||
return logical_sector_size_;
|
||||
}
|
||||
};
|
||||
|
||||
class PosixWritableFile : public WritableFile {
|
||||
@ -88,6 +96,7 @@ class PosixWritableFile : public WritableFile {
|
||||
const bool use_direct_io_;
|
||||
int fd_;
|
||||
uint64_t filesize_;
|
||||
size_t logical_sector_size_;
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
bool allow_fallocate_;
|
||||
bool fallocate_with_keep_size_;
|
||||
@ -110,12 +119,10 @@ class PosixWritableFile : public WritableFile {
|
||||
virtual bool IsSyncThreadSafe() const override;
|
||||
virtual bool use_direct_io() const override { return use_direct_io_; }
|
||||
virtual uint64_t GetFileSize() override;
|
||||
virtual size_t GetRequiredBufferAlignment() const override {
|
||||
// TODO(gzh): It should be the logical sector size/filesystem block size
|
||||
// hardcoded as 4k for most cases
|
||||
return 4 * 1024;
|
||||
}
|
||||
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
||||
virtual size_t GetRequiredBufferAlignment() const override {
|
||||
return logical_sector_size_;
|
||||
}
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
virtual Status Allocate(uint64_t offset, uint64_t len) override;
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user