2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2015-10-14 10:14:53 +02:00
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
2015-10-23 16:36:22 +02:00
|
|
|
#pragma once
|
2016-09-13 21:08:22 +02:00
|
|
|
#include <errno.h>
|
2015-10-14 10:14:53 +02:00
|
|
|
#include <unistd.h>
|
2016-04-21 19:37:27 +02:00
|
|
|
#include <atomic>
|
2016-09-13 21:08:22 +02:00
|
|
|
#include <string>
|
2015-10-14 10:14:53 +02:00
|
|
|
#include "rocksdb/env.h"
|
|
|
|
|
|
|
|
// For non linux platform, the following macros are used only as place
|
|
|
|
// holder.
|
2017-04-22 05:41:37 +02:00
|
|
|
#if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX)
|
2015-10-14 10:14:53 +02:00
|
|
|
#define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */
|
|
|
|
#define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */
|
|
|
|
#define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */
|
|
|
|
#define POSIX_FADV_WILLNEED 3 /* [MC1] will need these pages */
|
|
|
|
#define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace rocksdb {
|
2017-06-26 21:42:21 +02:00
|
|
|
static std::string IOErrorMsg(const std::string& context,
|
|
|
|
const std::string& file_name) {
|
|
|
|
if (file_name.empty()) {
|
|
|
|
return context;
|
|
|
|
}
|
|
|
|
return context + ": " + file_name;
|
|
|
|
}
|
2015-10-14 10:14:53 +02:00
|
|
|
|
2017-06-26 21:42:21 +02:00
|
|
|
// file_name can be left empty if it is not unkown.
|
|
|
|
static Status IOError(const std::string& context, const std::string& file_name,
|
|
|
|
int err_number) {
|
2017-02-17 19:47:35 +01:00
|
|
|
switch (err_number) {
|
|
|
|
case ENOSPC:
|
2017-06-26 21:42:21 +02:00
|
|
|
return Status::NoSpace(IOErrorMsg(context, file_name),
|
|
|
|
strerror(err_number));
|
2017-02-17 19:47:35 +01:00
|
|
|
case ESTALE:
|
|
|
|
return Status::IOError(Status::kStaleFile);
|
|
|
|
default:
|
2017-06-26 21:42:21 +02:00
|
|
|
return Status::IOError(IOErrorMsg(context, file_name),
|
|
|
|
strerror(err_number));
|
2017-02-17 19:47:35 +01:00
|
|
|
}
|
2015-10-14 10:14:53 +02:00
|
|
|
}
|
|
|
|
|
2016-04-21 19:37:27 +02:00
|
|
|
class PosixHelper {
|
|
|
|
public:
|
|
|
|
static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size);
|
|
|
|
};
|
|
|
|
|
2015-10-14 10:14:53 +02:00
|
|
|
class PosixSequentialFile : public SequentialFile {
|
|
|
|
private:
|
|
|
|
std::string filename_;
|
|
|
|
FILE* file_;
|
|
|
|
int fd_;
|
2016-12-22 21:51:29 +01:00
|
|
|
bool use_direct_io_;
|
2017-02-23 20:17:49 +01:00
|
|
|
size_t logical_sector_size_;
|
2015-10-14 10:14:53 +02:00
|
|
|
|
|
|
|
public:
|
2017-01-12 01:42:07 +01:00
|
|
|
PosixSequentialFile(const std::string& fname, FILE* file, int fd,
|
2015-10-14 10:14:53 +02:00
|
|
|
const EnvOptions& options);
|
|
|
|
virtual ~PosixSequentialFile();
|
|
|
|
|
|
|
|
virtual Status Read(size_t n, Slice* result, char* scratch) override;
|
2017-01-12 01:42:07 +01:00
|
|
|
virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
|
|
|
|
char* scratch) override;
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual Status Skip(uint64_t n) override;
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
2017-01-13 21:01:08 +01:00
|
|
|
virtual bool use_direct_io() const override { return use_direct_io_; }
|
2017-02-23 20:17:49 +01:00
|
|
|
virtual size_t GetRequiredBufferAlignment() const override {
|
|
|
|
return logical_sector_size_;
|
|
|
|
}
|
2016-04-21 19:37:27 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
class PosixRandomAccessFile : public RandomAccessFile {
|
|
|
|
protected:
|
2015-10-14 10:14:53 +02:00
|
|
|
std::string filename_;
|
|
|
|
int fd_;
|
2016-12-22 21:51:29 +01:00
|
|
|
bool use_direct_io_;
|
2017-02-23 20:17:49 +01:00
|
|
|
size_t logical_sector_size_;
|
2015-10-14 10:14:53 +02:00
|
|
|
|
|
|
|
public:
|
|
|
|
PosixRandomAccessFile(const std::string& fname, int fd,
|
|
|
|
const EnvOptions& options);
|
|
|
|
virtual ~PosixRandomAccessFile();
|
|
|
|
|
|
|
|
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
|
|
|
char* scratch) const override;
|
2017-04-26 23:21:04 +02:00
|
|
|
|
|
|
|
virtual Status Prefetch(uint64_t offset, size_t n) override;
|
|
|
|
|
2017-04-22 05:41:37 +02:00
|
|
|
#if defined(OS_LINUX) || defined(OS_MACOSX) || defined(OS_AIX)
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
|
|
|
#endif
|
|
|
|
virtual void Hint(AccessPattern pattern) override;
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
2017-01-13 21:01:08 +01:00
|
|
|
virtual bool use_direct_io() const override { return use_direct_io_; }
|
2017-02-23 20:17:49 +01:00
|
|
|
virtual size_t GetRequiredBufferAlignment() const override {
|
|
|
|
return logical_sector_size_;
|
|
|
|
}
|
2016-04-21 19:37:27 +02:00
|
|
|
};
|
|
|
|
|
2015-10-14 10:14:53 +02:00
|
|
|
class PosixWritableFile : public WritableFile {
|
2016-04-21 19:37:27 +02:00
|
|
|
protected:
|
2015-10-14 10:14:53 +02:00
|
|
|
const std::string filename_;
|
2017-01-13 21:01:08 +01:00
|
|
|
const bool use_direct_io_;
|
2015-10-14 10:14:53 +02:00
|
|
|
int fd_;
|
|
|
|
uint64_t filesize_;
|
2017-02-23 20:17:49 +01:00
|
|
|
size_t logical_sector_size_;
|
2015-10-28 19:55:20 +01:00
|
|
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
2015-10-14 10:14:53 +02:00
|
|
|
bool allow_fallocate_;
|
|
|
|
bool fallocate_with_keep_size_;
|
2015-10-28 19:55:20 +01:00
|
|
|
#endif
|
2015-10-14 10:14:53 +02:00
|
|
|
|
|
|
|
public:
|
2016-04-21 19:37:27 +02:00
|
|
|
explicit PosixWritableFile(const std::string& fname, int fd,
|
|
|
|
const EnvOptions& options);
|
|
|
|
virtual ~PosixWritableFile();
|
2015-10-14 10:14:53 +02:00
|
|
|
|
2017-02-22 19:00:25 +01:00
|
|
|
// Need to implement this so the file is truncated correctly
|
|
|
|
// with direct I/O
|
|
|
|
virtual Status Truncate(uint64_t size) override;
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual Status Close() override;
|
|
|
|
virtual Status Append(const Slice& data) override;
|
2016-11-19 02:06:37 +01:00
|
|
|
virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual Status Flush() override;
|
|
|
|
virtual Status Sync() override;
|
|
|
|
virtual Status Fsync() override;
|
|
|
|
virtual bool IsSyncThreadSafe() const override;
|
2017-01-13 21:01:08 +01:00
|
|
|
virtual bool use_direct_io() const override { return use_direct_io_; }
|
2017-11-10 18:25:26 +01:00
|
|
|
virtual void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override;
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual uint64_t GetFileSize() override;
|
2017-02-23 20:17:49 +01:00
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
2016-12-22 21:51:29 +01:00
|
|
|
virtual size_t GetRequiredBufferAlignment() const override {
|
2017-02-23 20:17:49 +01:00
|
|
|
return logical_sector_size_;
|
2016-12-22 21:51:29 +01:00
|
|
|
}
|
2015-10-28 19:55:20 +01:00
|
|
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
2015-11-11 02:03:42 +01:00
|
|
|
virtual Status Allocate(uint64_t offset, uint64_t len) override;
|
2017-02-01 19:19:47 +01:00
|
|
|
#endif
|
2017-04-22 05:41:37 +02:00
|
|
|
#ifdef ROCKSDB_RANGESYNC_PRESENT
|
2015-11-11 02:03:42 +01:00
|
|
|
virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override;
|
2017-04-22 05:41:37 +02:00
|
|
|
#endif
|
|
|
|
#ifdef OS_LINUX
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual size_t GetUniqueId(char* id, size_t max_size) const override;
|
2017-02-02 19:32:40 +01:00
|
|
|
#endif
|
2015-10-14 10:14:53 +02:00
|
|
|
};
|
|
|
|
|
2016-12-22 21:51:29 +01:00
|
|
|
// mmap() based random-access
|
2015-10-14 10:14:53 +02:00
|
|
|
class PosixMmapReadableFile : public RandomAccessFile {
|
|
|
|
private:
|
|
|
|
int fd_;
|
|
|
|
std::string filename_;
|
|
|
|
void* mmapped_region_;
|
|
|
|
size_t length_;
|
|
|
|
|
|
|
|
public:
|
|
|
|
PosixMmapReadableFile(const int fd, const std::string& fname, void* base,
|
|
|
|
size_t length, const EnvOptions& options);
|
|
|
|
virtual ~PosixMmapReadableFile();
|
|
|
|
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
|
|
|
char* scratch) const override;
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
|
|
|
};
|
|
|
|
|
|
|
|
class PosixMmapFile : public WritableFile {
|
|
|
|
private:
|
|
|
|
std::string filename_;
|
|
|
|
int fd_;
|
|
|
|
size_t page_size_;
|
|
|
|
size_t map_size_; // How much extra memory to map at a time
|
|
|
|
char* base_; // The mapped region
|
|
|
|
char* limit_; // Limit of the mapped region
|
|
|
|
char* dst_; // Where to write next (in range [base_,limit_])
|
|
|
|
char* last_sync_; // Where have we synced up to
|
|
|
|
uint64_t file_offset_; // Offset of base_ in file
|
2015-10-28 19:55:20 +01:00
|
|
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
2015-10-14 10:14:53 +02:00
|
|
|
bool allow_fallocate_; // If false, fallocate calls are bypassed
|
|
|
|
bool fallocate_with_keep_size_;
|
2015-10-28 19:55:20 +01:00
|
|
|
#endif
|
2015-10-14 10:14:53 +02:00
|
|
|
|
|
|
|
// Roundup x to a multiple of y
|
|
|
|
static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; }
|
|
|
|
|
|
|
|
size_t TruncateToPageBoundary(size_t s) {
|
|
|
|
s -= (s & (page_size_ - 1));
|
|
|
|
assert((s % page_size_) == 0);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status MapNewRegion();
|
|
|
|
Status UnmapCurrentRegion();
|
|
|
|
Status Msync();
|
|
|
|
|
|
|
|
public:
|
|
|
|
PosixMmapFile(const std::string& fname, int fd, size_t page_size,
|
|
|
|
const EnvOptions& options);
|
|
|
|
~PosixMmapFile();
|
|
|
|
|
|
|
|
// Means Close() will properly take care of truncate
|
|
|
|
// and it does not need any additional information
|
2018-03-05 22:08:17 +01:00
|
|
|
virtual Status Truncate(uint64_t /*size*/) override { return Status::OK(); }
|
2015-10-14 10:14:53 +02:00
|
|
|
virtual Status Close() override;
|
|
|
|
virtual Status Append(const Slice& data) override;
|
|
|
|
virtual Status Flush() override;
|
|
|
|
virtual Status Sync() override;
|
|
|
|
virtual Status Fsync() override;
|
|
|
|
virtual uint64_t GetFileSize() override;
|
|
|
|
virtual Status InvalidateCache(size_t offset, size_t length) override;
|
2015-10-28 19:55:20 +01:00
|
|
|
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
2015-11-11 02:03:42 +01:00
|
|
|
virtual Status Allocate(uint64_t offset, uint64_t len) override;
|
2015-10-28 19:55:20 +01:00
|
|
|
#endif
|
2015-10-14 10:14:53 +02:00
|
|
|
};
|
|
|
|
|
2016-09-13 21:08:22 +02:00
|
|
|
class PosixRandomRWFile : public RandomRWFile {
|
|
|
|
public:
|
|
|
|
explicit PosixRandomRWFile(const std::string& fname, int fd,
|
|
|
|
const EnvOptions& options);
|
|
|
|
virtual ~PosixRandomRWFile();
|
|
|
|
|
|
|
|
virtual Status Write(uint64_t offset, const Slice& data) override;
|
|
|
|
|
|
|
|
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
|
|
|
char* scratch) const override;
|
|
|
|
|
|
|
|
virtual Status Flush() override;
|
|
|
|
virtual Status Sync() override;
|
|
|
|
virtual Status Fsync() override;
|
|
|
|
virtual Status Close() override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
const std::string filename_;
|
|
|
|
int fd_;
|
|
|
|
};
|
|
|
|
|
2015-10-14 10:14:53 +02:00
|
|
|
class PosixDirectory : public Directory {
|
|
|
|
public:
|
|
|
|
explicit PosixDirectory(int fd) : fd_(fd) {}
|
2015-10-27 20:15:55 +01:00
|
|
|
~PosixDirectory();
|
|
|
|
virtual Status Fsync() override;
|
2015-10-14 10:14:53 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
int fd_;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace rocksdb
|