diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 50b29167b..3e422d89e 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -41,7 +41,7 @@ struct EnvOptions { EnvOptions(); // construct from Options - EnvOptions(const Options& options); + explicit EnvOptions(const Options& options); // If true, then allow caching of data in environment buffers bool use_os_buffer; @@ -253,6 +253,13 @@ class SequentialFile { // // REQUIRES: External synchronization virtual Status Skip(uint64_t n) = 0; + + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + virtual Status InvalidateCache(size_t offset, size_t length) { + return Status::NotSupported("InvalidateCache not supported."); + } }; // A file abstraction for randomly reading the contents of a file. @@ -298,6 +305,12 @@ class RandomAccessFile { virtual void Hint(AccessPattern pattern) {} + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + virtual Status InvalidateCache(size_t offset, size_t length) { + return Status::NotSupported("InvalidateCache not supported."); + } }; // A file abstraction for sequential writing. The implementation @@ -347,6 +360,14 @@ class WritableFile { *block_size = preallocation_block_size_; } + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + // This call has no effect on dirty pages in the cache. + virtual Status InvalidateCache(size_t offset, size_t length) { + return Status::NotSupported("InvalidateCache not supported."); + } + protected: // PrepareWrite performs any necessary preparation for a write // before the write actually occurs. This allows for pre-allocation diff --git a/util/env_posix.cc b/util/env_posix.cc index 50f96e507..db1b51493 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -137,6 +137,15 @@ class PosixSequentialFile: public SequentialFile { } return Status::OK(); } + + virtual Status InvalidateCache(size_t offset, size_t length) { + // free OS pages + int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); + if (ret == 0) { + return Status::OK(); + } + return IOError(filename_, errno); + } }; // pread() based random-access @@ -223,20 +232,30 @@ class PosixRandomAccessFile: public RandomAccessFile { } } + virtual Status InvalidateCache(size_t offset, size_t length) { + // free OS pages + int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); + if (ret == 0) { + return Status::OK(); + } + return IOError(filename_, errno); + } }; // mmap() based random-access class PosixMmapReadableFile: public RandomAccessFile { private: + int fd_; std::string filename_; void* mmapped_region_; size_t length_; public: // base[0,length-1] contains the mmapped contents of the file. - PosixMmapReadableFile(const std::string& fname, void* base, size_t length, + PosixMmapReadableFile(const int fd, const std::string& fname, + void* base, size_t length, const EnvOptions& options) - : filename_(fname), mmapped_region_(base), length_(length) { + : fd_(fd), filename_(fname), mmapped_region_(base), length_(length) { assert(options.use_mmap_reads); assert(options.use_os_buffer); } @@ -253,6 +272,14 @@ class PosixMmapReadableFile: public RandomAccessFile { } return s; } + virtual Status InvalidateCache(size_t offset, size_t length) { + // free OS pages + int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); + if (ret == 0) { + return Status::OK(); + } + return IOError(filename_, errno); + } }; // We preallocate up to an extra megabyte and use memcpy to append new @@ -480,6 +507,15 @@ class PosixMmapFile : public WritableFile { return file_offset_ + used; } + virtual Status InvalidateCache(size_t offset, size_t length) { + // free OS pages + int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); + if (ret == 0) { + return Status::OK(); + } + return IOError(filename_, errno); + } + #ifdef OS_LINUX virtual Status Allocate(off_t offset, off_t len) { TEST_KILL_RANDOM(leveldb_kill_odds); @@ -644,6 +680,15 @@ class PosixWritableFile : public WritableFile { return filesize_; } + virtual Status InvalidateCache(size_t offset, size_t length) { + // free OS pages + int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED); + if (ret == 0) { + return Status::OK(); + } + return IOError(filename_, errno); + } + #ifdef OS_LINUX virtual Status Allocate(off_t offset, off_t len) { TEST_KILL_RANDOM(leveldb_kill_odds); @@ -768,7 +813,8 @@ class PosixEnv : public Env { if (s.ok()) { void* base = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0); if (base != MAP_FAILED) { - result->reset(new PosixMmapReadableFile(fname, base, size, options)); + result->reset(new PosixMmapReadableFile(fd, fname, base, + size, options)); } else { s = IOError(fname, errno); } diff --git a/util/env_test.cc b/util/env_test.cc index 92b681113..f803bb440 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -317,6 +317,46 @@ TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) { ASSERT_TRUE(!HasPrefix(ids)); } +TEST(EnvPosixTest, InvalidateCache) { + const EnvOptions soptions; + std::string fname = test::TmpDir() + "/" + "testfile"; + + // Create file. + { + unique_ptr wfile; + ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions)); + ASSERT_OK(wfile.get()->Append(Slice("Hello world"))); + ASSERT_OK(wfile.get()->InvalidateCache(0, 0)); + ASSERT_OK(wfile.get()->Close()); + } + + // Random Read + { + unique_ptr file; + char scratch[100]; + Slice result; + ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions)); + ASSERT_OK(file.get()->Read(0, 11, &result, scratch)); + ASSERT_EQ(memcmp(scratch, "Hello world", 11), 0); + ASSERT_OK(file.get()->InvalidateCache(0, 11)); + ASSERT_OK(file.get()->InvalidateCache(0, 0)); + } + + // Sequential Read + { + unique_ptr file; + char scratch[100]; + Slice result; + ASSERT_OK(env_->NewSequentialFile(fname, &file, soptions)); + ASSERT_OK(file.get()->Read(11, &result, scratch)); + ASSERT_EQ(memcmp(scratch, "Hello world", 11), 0); + ASSERT_OK(file.get()->InvalidateCache(0, 11)); + ASSERT_OK(file.get()->InvalidateCache(0, 0)); + } + // Delete the file + ASSERT_OK(env_->DeleteFile(fname)); +} + } // namespace leveldb int main(int argc, char** argv) {