Added support for sequential read-ahead file (#5580)
Summary: Added support for sequential read-ahead file that can prefetch the read data and later serve it from internal cache buffer. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5580 Differential Revision: D16287082 Pulled By: elipoz fbshipit-source-id: a3e7ad9643d377d39352ff63058ce050ec31dcf3
This commit is contained in:
parent
699a569c52
commit
0f4d90e6e4
@ -492,13 +492,11 @@ inline std::string EncodeInt(uint64_t x) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
class StringEnv : public EnvWrapper {
|
|
||||||
public:
|
|
||||||
class SeqStringSource : public SequentialFile {
|
class SeqStringSource : public SequentialFile {
|
||||||
public:
|
public:
|
||||||
explicit SeqStringSource(const std::string& data)
|
explicit SeqStringSource(const std::string& data)
|
||||||
: data_(data), offset_(0) {}
|
: data_(data), offset_(0) {}
|
||||||
~SeqStringSource() {}
|
~SeqStringSource() override {}
|
||||||
Status Read(size_t n, Slice* result, char* scratch) override {
|
Status Read(size_t n, Slice* result, char* scratch) override {
|
||||||
std::string output;
|
std::string output;
|
||||||
if (offset_ < data_.size()) {
|
if (offset_ < data_.size()) {
|
||||||
@ -527,129 +525,136 @@ class StringEnv : public EnvWrapper {
|
|||||||
size_t offset_;
|
size_t offset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
class StringSink : public WritableFile {
|
class StringEnv : public EnvWrapper {
|
||||||
public:
|
public:
|
||||||
explicit StringSink(std::string* contents)
|
class StringSink : public WritableFile {
|
||||||
: WritableFile(), contents_(contents) {}
|
public:
|
||||||
virtual Status Truncate(uint64_t size) override {
|
explicit StringSink(std::string* contents)
|
||||||
contents_->resize(static_cast<size_t>(size));
|
: WritableFile(), contents_(contents) {}
|
||||||
return Status::OK();
|
virtual Status Truncate(uint64_t size) override {
|
||||||
}
|
contents_->resize(static_cast<size_t>(size));
|
||||||
virtual Status Close() override { return Status::OK(); }
|
return Status::OK();
|
||||||
virtual Status Flush() override { return Status::OK(); }
|
}
|
||||||
virtual Status Sync() override { return Status::OK(); }
|
virtual Status Close() override { return Status::OK(); }
|
||||||
virtual Status Append(const Slice& slice) override {
|
virtual Status Flush() override { return Status::OK(); }
|
||||||
contents_->append(slice.data(), slice.size());
|
virtual Status Sync() override { return Status::OK(); }
|
||||||
|
virtual Status Append(const Slice& slice) override {
|
||||||
|
contents_->append(slice.data(), slice.size());
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string* contents_;
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit StringEnv(Env* t) : EnvWrapper(t) {}
|
||||||
|
~StringEnv() override {}
|
||||||
|
|
||||||
|
const std::string& GetContent(const std::string& f) { return files_[f]; }
|
||||||
|
|
||||||
|
const Status WriteToNewFile(const std::string& file_name,
|
||||||
|
const std::string& content) {
|
||||||
|
std::unique_ptr<WritableFile> r;
|
||||||
|
auto s = NewWritableFile(file_name, &r, EnvOptions());
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
r->Append(content);
|
||||||
|
r->Flush();
|
||||||
|
r->Close();
|
||||||
|
assert(files_[file_name] == content);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
// The following text is boilerplate that forwards all methods to target()
|
||||||
std::string* contents_;
|
Status NewSequentialFile(const std::string& f,
|
||||||
};
|
std::unique_ptr<SequentialFile>* r,
|
||||||
|
|
||||||
explicit StringEnv(Env* t) : EnvWrapper(t) {}
|
|
||||||
virtual ~StringEnv() {}
|
|
||||||
|
|
||||||
const std::string& GetContent(const std::string& f) { return files_[f]; }
|
|
||||||
|
|
||||||
const Status WriteToNewFile(const std::string& file_name,
|
|
||||||
const std::string& content) {
|
|
||||||
std::unique_ptr<WritableFile> r;
|
|
||||||
auto s = NewWritableFile(file_name, &r, EnvOptions());
|
|
||||||
if (!s.ok()) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
r->Append(content);
|
|
||||||
r->Flush();
|
|
||||||
r->Close();
|
|
||||||
assert(files_[file_name] == content);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
// The following text is boilerplate that forwards all methods to target()
|
|
||||||
Status NewSequentialFile(const std::string& f,
|
|
||||||
std::unique_ptr<SequentialFile>* r,
|
|
||||||
const EnvOptions& /*options*/) override {
|
|
||||||
auto iter = files_.find(f);
|
|
||||||
if (iter == files_.end()) {
|
|
||||||
return Status::NotFound("The specified file does not exist", f);
|
|
||||||
}
|
|
||||||
r->reset(new SeqStringSource(iter->second));
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
Status NewRandomAccessFile(const std::string& /*f*/,
|
|
||||||
std::unique_ptr<RandomAccessFile>* /*r*/,
|
|
||||||
const EnvOptions& /*options*/) override {
|
const EnvOptions& /*options*/) override {
|
||||||
return Status::NotSupported();
|
auto iter = files_.find(f);
|
||||||
}
|
if (iter == files_.end()) {
|
||||||
Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
|
return Status::NotFound("The specified file does not exist", f);
|
||||||
const EnvOptions& /*options*/) override {
|
}
|
||||||
auto iter = files_.find(f);
|
r->reset(new SeqStringSource(iter->second));
|
||||||
if (iter != files_.end()) {
|
return Status::OK();
|
||||||
return Status::IOError("The specified file already exists", f);
|
|
||||||
}
|
}
|
||||||
r->reset(new StringSink(&files_[f]));
|
Status NewRandomAccessFile(const std::string& /*f*/,
|
||||||
return Status::OK();
|
std::unique_ptr<RandomAccessFile>* /*r*/,
|
||||||
}
|
const EnvOptions& /*options*/) override {
|
||||||
virtual Status NewDirectory(const std::string& /*name*/,
|
return Status::NotSupported();
|
||||||
std::unique_ptr<Directory>* /*result*/) override {
|
|
||||||
return Status::NotSupported();
|
|
||||||
}
|
|
||||||
Status FileExists(const std::string& f) override {
|
|
||||||
if (files_.find(f) == files_.end()) {
|
|
||||||
return Status::NotFound();
|
|
||||||
}
|
}
|
||||||
return Status::OK();
|
Status NewWritableFile(const std::string& f,
|
||||||
}
|
std::unique_ptr<WritableFile>* r,
|
||||||
Status GetChildren(const std::string& /*dir*/,
|
const EnvOptions& /*options*/) override {
|
||||||
std::vector<std::string>* /*r*/) override {
|
auto iter = files_.find(f);
|
||||||
return Status::NotSupported();
|
if (iter != files_.end()) {
|
||||||
}
|
return Status::IOError("The specified file already exists", f);
|
||||||
Status DeleteFile(const std::string& f) override {
|
}
|
||||||
files_.erase(f);
|
r->reset(new StringSink(&files_[f]));
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
Status CreateDir(const std::string& /*d*/) override {
|
virtual Status NewDirectory(
|
||||||
return Status::NotSupported();
|
const std::string& /*name*/,
|
||||||
}
|
std::unique_ptr<Directory>* /*result*/) override {
|
||||||
Status CreateDirIfMissing(const std::string& /*d*/) override {
|
return Status::NotSupported();
|
||||||
return Status::NotSupported();
|
}
|
||||||
}
|
Status FileExists(const std::string& f) override {
|
||||||
Status DeleteDir(const std::string& /*d*/) override {
|
if (files_.find(f) == files_.end()) {
|
||||||
return Status::NotSupported();
|
return Status::NotFound();
|
||||||
}
|
}
|
||||||
Status GetFileSize(const std::string& f, uint64_t* s) override {
|
return Status::OK();
|
||||||
auto iter = files_.find(f);
|
}
|
||||||
if (iter == files_.end()) {
|
Status GetChildren(const std::string& /*dir*/,
|
||||||
return Status::NotFound("The specified file does not exist:", f);
|
std::vector<std::string>* /*r*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
Status DeleteFile(const std::string& f) override {
|
||||||
|
files_.erase(f);
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
Status CreateDir(const std::string& /*d*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
Status CreateDirIfMissing(const std::string& /*d*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
Status DeleteDir(const std::string& /*d*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
Status GetFileSize(const std::string& f, uint64_t* s) override {
|
||||||
|
auto iter = files_.find(f);
|
||||||
|
if (iter == files_.end()) {
|
||||||
|
return Status::NotFound("The specified file does not exist:", f);
|
||||||
|
}
|
||||||
|
*s = iter->second.size();
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
*s = iter->second.size();
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
Status GetFileModificationTime(const std::string& /*fname*/,
|
Status GetFileModificationTime(const std::string& /*fname*/,
|
||||||
uint64_t* /*file_mtime*/) override {
|
uint64_t* /*file_mtime*/) override {
|
||||||
return Status::NotSupported();
|
return Status::NotSupported();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status RenameFile(const std::string& /*s*/,
|
Status RenameFile(const std::string& /*s*/,
|
||||||
|
const std::string& /*t*/) override {
|
||||||
|
return Status::NotSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status LinkFile(const std::string& /*s*/,
|
||||||
const std::string& /*t*/) override {
|
const std::string& /*t*/) override {
|
||||||
return Status::NotSupported();
|
return Status::NotSupported();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status LinkFile(const std::string& /*s*/, const std::string& /*t*/) override {
|
Status LockFile(const std::string& /*f*/, FileLock** /*l*/) override {
|
||||||
return Status::NotSupported();
|
return Status::NotSupported();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status LockFile(const std::string& /*f*/, FileLock** /*l*/) override {
|
Status UnlockFile(FileLock* /*l*/) override {
|
||||||
return Status::NotSupported();
|
return Status::NotSupported();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status UnlockFile(FileLock* /*l*/) override { return Status::NotSupported(); }
|
protected:
|
||||||
|
std::unordered_map<std::string, std::string> files_;
|
||||||
protected:
|
};
|
||||||
std::unordered_map<std::string, std::string> files_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Randomly initialize the given DBOptions
|
// Randomly initialize the given DBOptions
|
||||||
void RandomInitDBOptions(DBOptions* db_opt, Random* rnd);
|
void RandomInitDBOptions(DBOptions* db_opt, Random* rnd);
|
||||||
|
@ -639,6 +639,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
|||||||
|
|
||||||
Status Read(uint64_t offset, size_t n, Slice* result,
|
Status Read(uint64_t offset, size_t n, Slice* result,
|
||||||
char* scratch) const override {
|
char* scratch) const override {
|
||||||
|
// Read-ahead only make sense if we have some slack left after reading
|
||||||
if (n + alignment_ >= readahead_size_) {
|
if (n + alignment_ >= readahead_size_) {
|
||||||
return file_->Read(offset, n, result, scratch);
|
return file_->Read(offset, n, result, scratch);
|
||||||
}
|
}
|
||||||
@ -646,14 +647,13 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
|||||||
std::unique_lock<std::mutex> lk(lock_);
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
|
||||||
size_t cached_len = 0;
|
size_t cached_len = 0;
|
||||||
// Check if there is a cache hit, means that [offset, offset + n) is either
|
// Check if there is a cache hit, meaning that [offset, offset + n) is either
|
||||||
// completely or partially in the buffer
|
// completely or partially in the buffer.
|
||||||
// If it's completely cached, including end of file case when offset + n is
|
// If it's completely cached, including end of file case when offset + n is
|
||||||
// greater than EOF, return
|
// greater than EOF, then return.
|
||||||
if (TryReadFromCache(offset, n, &cached_len, scratch) &&
|
if (TryReadFromCache(offset, n, &cached_len, scratch) &&
|
||||||
(cached_len == n ||
|
(cached_len == n || buffer_.CurrentSize() < readahead_size_)) {
|
||||||
// End of file
|
// We read exactly what we needed, or we hit end of file - return.
|
||||||
buffer_.CurrentSize() < readahead_size_)) {
|
|
||||||
*result = Slice(scratch, cached_len);
|
*result = Slice(scratch, cached_len);
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
@ -661,25 +661,14 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
|||||||
// In the case of cache hit advanced_offset is already aligned, means that
|
// In the case of cache hit advanced_offset is already aligned, means that
|
||||||
// chunk_offset equals to advanced_offset
|
// chunk_offset equals to advanced_offset
|
||||||
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
||||||
Slice readahead_result;
|
|
||||||
|
|
||||||
Status s = ReadIntoBuffer(chunk_offset, readahead_size_);
|
Status s = ReadIntoBuffer(chunk_offset, readahead_size_);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
// The data we need is now in cache, so we can safely read it
|
||||||
// exceed the file end position, so the following check is required
|
size_t remaining_len;
|
||||||
if (advanced_offset < chunk_offset + buffer_.CurrentSize()) {
|
TryReadFromCache(advanced_offset, n - cached_len, &remaining_len,
|
||||||
// In the case of cache miss, the first chunk_padding bytes in buffer_
|
scratch + cached_len);
|
||||||
// are
|
*result = Slice(scratch, cached_len + remaining_len);
|
||||||
// stored for alignment only and must be skipped
|
|
||||||
size_t chunk_padding = advanced_offset - chunk_offset;
|
|
||||||
auto remaining_len =
|
|
||||||
std::min(buffer_.CurrentSize() - chunk_padding, n - cached_len);
|
|
||||||
memcpy(scratch + cached_len, buffer_.BufferStart() + chunk_padding,
|
|
||||||
remaining_len);
|
|
||||||
*result = Slice(scratch, cached_len + remaining_len);
|
|
||||||
} else {
|
|
||||||
*result = Slice(scratch, cached_len);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -690,6 +679,9 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
|||||||
// `Read()` assumes a smaller prefetch buffer indicates EOF was reached.
|
// `Read()` assumes a smaller prefetch buffer indicates EOF was reached.
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
|
||||||
size_t offset_ = static_cast<size_t>(offset);
|
size_t offset_ = static_cast<size_t>(offset);
|
||||||
size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset_);
|
size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset_);
|
||||||
if (prefetch_offset == buffer_offset_) {
|
if (prefetch_offset == buffer_offset_) {
|
||||||
@ -706,12 +698,18 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
|||||||
void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
|
void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
|
||||||
|
|
||||||
Status InvalidateCache(size_t offset, size_t length) override {
|
Status InvalidateCache(size_t offset, size_t length) override {
|
||||||
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
buffer_.Clear();
|
||||||
return file_->InvalidateCache(offset, length);
|
return file_->InvalidateCache(offset, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool use_direct_io() const override { return file_->use_direct_io(); }
|
bool use_direct_io() const override { return file_->use_direct_io(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Tries to read from buffer_ n bytes starting at offset. If anything was read
|
||||||
|
// from the cache, it sets cached_len to the number of bytes actually read,
|
||||||
|
// copies these number of bytes to scratch and returns true.
|
||||||
|
// If nothing was read sets cached_len to 0 and returns false.
|
||||||
bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
|
bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
|
||||||
char* scratch) const {
|
char* scratch) const {
|
||||||
if (offset < buffer_offset_ ||
|
if (offset < buffer_offset_ ||
|
||||||
@ -726,6 +724,9 @@ private:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reads into buffer_ the next n bytes from file_ starting at offset.
|
||||||
|
// Can actually read less if EOF was reached.
|
||||||
|
// Returns the status of the read operastion on the file.
|
||||||
Status ReadIntoBuffer(uint64_t offset, size_t n) const {
|
Status ReadIntoBuffer(uint64_t offset, size_t n) const {
|
||||||
if (n > buffer_.Capacity()) {
|
if (n > buffer_.Capacity()) {
|
||||||
n = buffer_.Capacity();
|
n = buffer_.Capacity();
|
||||||
@ -742,14 +743,171 @@ private:
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<RandomAccessFile> file_;
|
const std::unique_ptr<RandomAccessFile> file_;
|
||||||
const size_t alignment_;
|
const size_t alignment_;
|
||||||
size_t readahead_size_;
|
const size_t readahead_size_;
|
||||||
|
|
||||||
mutable std::mutex lock_;
|
mutable std::mutex lock_;
|
||||||
|
// The buffer storing the prefetched data
|
||||||
mutable AlignedBuffer buffer_;
|
mutable AlignedBuffer buffer_;
|
||||||
|
// The offset in file_, corresponding to data stored in buffer_
|
||||||
mutable uint64_t buffer_offset_;
|
mutable uint64_t buffer_offset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// This class wraps a SequentialFile, exposing same API, with the differenece
|
||||||
|
// of being able to prefetch up to readahead_size bytes and then serve them
|
||||||
|
// from memory, avoiding the entire round-trip if, for example, the data for the
|
||||||
|
// file is actually remote.
|
||||||
|
class ReadaheadSequentialFile : public SequentialFile {
|
||||||
|
public:
|
||||||
|
ReadaheadSequentialFile(std::unique_ptr<SequentialFile>&& file,
|
||||||
|
size_t readahead_size)
|
||||||
|
: file_(std::move(file)),
|
||||||
|
alignment_(file_->GetRequiredBufferAlignment()),
|
||||||
|
readahead_size_(Roundup(readahead_size, alignment_)),
|
||||||
|
buffer_(),
|
||||||
|
buffer_offset_(0),
|
||||||
|
read_offset_(0) {
|
||||||
|
buffer_.Alignment(alignment_);
|
||||||
|
buffer_.AllocateNewBuffer(readahead_size_);
|
||||||
|
}
|
||||||
|
|
||||||
|
ReadaheadSequentialFile(const ReadaheadSequentialFile&) = delete;
|
||||||
|
|
||||||
|
ReadaheadSequentialFile& operator=(const ReadaheadSequentialFile&) = delete;
|
||||||
|
|
||||||
|
Status Read(size_t n, Slice* result, char* scratch) override {
|
||||||
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
|
||||||
|
size_t cached_len = 0;
|
||||||
|
// Check if there is a cache hit, meaning that [offset, offset + n) is
|
||||||
|
// either completely or partially in the buffer. If it's completely cached,
|
||||||
|
// including end of file case when offset + n is greater than EOF, then
|
||||||
|
// return.
|
||||||
|
if (TryReadFromCache(n, &cached_len, scratch) &&
|
||||||
|
(cached_len == n || buffer_.CurrentSize() < readahead_size_)) {
|
||||||
|
// We read exactly what we needed, or we hit end of file - return.
|
||||||
|
*result = Slice(scratch, cached_len);
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
n -= cached_len;
|
||||||
|
|
||||||
|
Status s;
|
||||||
|
// Read-ahead only make sense if we have some slack left after reading
|
||||||
|
if (n + alignment_ >= readahead_size_) {
|
||||||
|
s = file_->Read(n, result, scratch + cached_len);
|
||||||
|
if (s.ok()) {
|
||||||
|
read_offset_ += result->size();
|
||||||
|
*result = Slice(scratch, cached_len + result->size());
|
||||||
|
}
|
||||||
|
buffer_.Clear();
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
s = ReadIntoBuffer(readahead_size_);
|
||||||
|
if (s.ok()) {
|
||||||
|
// The data we need is now in cache, so we can safely read it
|
||||||
|
size_t remaining_len;
|
||||||
|
TryReadFromCache(n, &remaining_len, scratch + cached_len);
|
||||||
|
*result = Slice(scratch, cached_len + remaining_len);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status Skip(uint64_t n) override {
|
||||||
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
Status s = Status::OK();
|
||||||
|
// First check if we need to skip already cached data
|
||||||
|
if (buffer_.CurrentSize() > 0) {
|
||||||
|
// Do we need to skip beyond cached data?
|
||||||
|
if (read_offset_ + n >= buffer_offset_ + buffer_.CurrentSize()) {
|
||||||
|
// Yes. Skip whaterver is in memory and adjust offset accordingly
|
||||||
|
n -= buffer_offset_ + buffer_.CurrentSize() - read_offset_;
|
||||||
|
read_offset_ = buffer_offset_ + buffer_.CurrentSize();
|
||||||
|
} else {
|
||||||
|
// No. The entire section to be skipped is entirely i cache.
|
||||||
|
read_offset_ += n;
|
||||||
|
n = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n > 0) {
|
||||||
|
// We still need to skip more, so call the file API for skipping
|
||||||
|
s = file_->Skip(n);
|
||||||
|
if (s.ok()) {
|
||||||
|
read_offset_ += n;
|
||||||
|
}
|
||||||
|
buffer_.Clear();
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status PositionedRead(uint64_t offset, size_t n, Slice* result,
|
||||||
|
char* scratch) override {
|
||||||
|
return file_->PositionedRead(offset, n, result, scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status InvalidateCache(size_t offset, size_t length) override {
|
||||||
|
std::unique_lock<std::mutex> lk(lock_);
|
||||||
|
buffer_.Clear();
|
||||||
|
return file_->InvalidateCache(offset, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool use_direct_io() const override { return file_->use_direct_io(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Tries to read from buffer_ n bytes. If anything was read from the cache, it
|
||||||
|
// sets cached_len to the number of bytes actually read, copies these number
|
||||||
|
// of bytes to scratch and returns true.
|
||||||
|
// If nothing was read sets cached_len to 0 and returns false.
|
||||||
|
bool TryReadFromCache(size_t n, size_t* cached_len, char* scratch) {
|
||||||
|
if (read_offset_ < buffer_offset_ ||
|
||||||
|
read_offset_ >= buffer_offset_ + buffer_.CurrentSize()) {
|
||||||
|
*cached_len = 0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint64_t offset_in_buffer = read_offset_ - buffer_offset_;
|
||||||
|
*cached_len = std::min(
|
||||||
|
buffer_.CurrentSize() - static_cast<size_t>(offset_in_buffer), n);
|
||||||
|
memcpy(scratch, buffer_.BufferStart() + offset_in_buffer, *cached_len);
|
||||||
|
read_offset_ += *cached_len;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads into buffer_ the next n bytes from file_.
|
||||||
|
// Can actually read less if EOF was reached.
|
||||||
|
// Returns the status of the read operastion on the file.
|
||||||
|
Status ReadIntoBuffer(size_t n) {
|
||||||
|
if (n > buffer_.Capacity()) {
|
||||||
|
n = buffer_.Capacity();
|
||||||
|
}
|
||||||
|
assert(IsFileSectorAligned(n, alignment_));
|
||||||
|
Slice result;
|
||||||
|
Status s = file_->Read(n, &result, buffer_.BufferStart());
|
||||||
|
if (s.ok()) {
|
||||||
|
buffer_offset_ = read_offset_;
|
||||||
|
buffer_.Size(result.size());
|
||||||
|
assert(buffer_.BufferStart() == result.data());
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unique_ptr<SequentialFile> file_;
|
||||||
|
const size_t alignment_;
|
||||||
|
const size_t readahead_size_;
|
||||||
|
|
||||||
|
std::mutex lock_;
|
||||||
|
// The buffer storing the prefetched data
|
||||||
|
AlignedBuffer buffer_;
|
||||||
|
// The offset in file_, corresponding to data stored in buffer_
|
||||||
|
uint64_t buffer_offset_;
|
||||||
|
// The offset up to which data was read from file_. In fact, it can be larger
|
||||||
|
// than the actual file size, since the file_->Skip(n) call doesn't return the
|
||||||
|
// actual number of bytes that were skipped, which can be less than n.
|
||||||
|
// This is not a problemm since read_offset_ is monotonically increasing and
|
||||||
|
// its only use is to figure out if next piece of data should be read from
|
||||||
|
// buffer_ or file_ directly.
|
||||||
|
uint64_t read_offset_;
|
||||||
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
|
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
|
||||||
@ -866,6 +1024,14 @@ std::unique_ptr<RandomAccessFile> NewReadaheadRandomAccessFile(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<SequentialFile>
|
||||||
|
SequentialFileReader::NewReadaheadSequentialFile(
|
||||||
|
std::unique_ptr<SequentialFile>&& file, size_t readahead_size) {
|
||||||
|
std::unique_ptr<SequentialFile> result(
|
||||||
|
new ReadaheadSequentialFile(std::move(file), readahead_size));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
Status NewWritableFile(Env* env, const std::string& fname,
|
Status NewWritableFile(Env* env, const std::string& fname,
|
||||||
std::unique_ptr<WritableFile>* result,
|
std::unique_ptr<WritableFile>* result,
|
||||||
const EnvOptions& options) {
|
const EnvOptions& options) {
|
||||||
|
@ -43,12 +43,18 @@ class SequentialFileReader {
|
|||||||
private:
|
private:
|
||||||
std::unique_ptr<SequentialFile> file_;
|
std::unique_ptr<SequentialFile> file_;
|
||||||
std::string file_name_;
|
std::string file_name_;
|
||||||
std::atomic<size_t> offset_; // read offset
|
std::atomic<size_t> offset_{0}; // read offset
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit SequentialFileReader(std::unique_ptr<SequentialFile>&& _file,
|
explicit SequentialFileReader(std::unique_ptr<SequentialFile>&& _file,
|
||||||
const std::string& _file_name)
|
const std::string& _file_name)
|
||||||
: file_(std::move(_file)), file_name_(_file_name), offset_(0) {}
|
: file_(std::move(_file)), file_name_(_file_name) {}
|
||||||
|
|
||||||
|
explicit SequentialFileReader(std::unique_ptr<SequentialFile>&& _file,
|
||||||
|
const std::string& _file_name,
|
||||||
|
size_t _readahead_size)
|
||||||
|
: file_(NewReadaheadSequentialFile(std::move(_file), _readahead_size)),
|
||||||
|
file_name_(_file_name) {}
|
||||||
|
|
||||||
SequentialFileReader(SequentialFileReader&& o) ROCKSDB_NOEXCEPT {
|
SequentialFileReader(SequentialFileReader&& o) ROCKSDB_NOEXCEPT {
|
||||||
*this = std::move(o);
|
*this = std::move(o);
|
||||||
@ -66,13 +72,17 @@ class SequentialFileReader {
|
|||||||
|
|
||||||
Status Skip(uint64_t n);
|
Status Skip(uint64_t n);
|
||||||
|
|
||||||
void Rewind();
|
|
||||||
|
|
||||||
SequentialFile* file() { return file_.get(); }
|
SequentialFile* file() { return file_.get(); }
|
||||||
|
|
||||||
std::string file_name() { return file_name_; }
|
std::string file_name() { return file_name_; }
|
||||||
|
|
||||||
bool use_direct_io() const { return file_->use_direct_io(); }
|
bool use_direct_io() const { return file_->use_direct_io(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// NewReadaheadSequentialFile provides a wrapper over SequentialFile to
|
||||||
|
// always prefetch additional data with every read.
|
||||||
|
static std::unique_ptr<SequentialFile> NewReadaheadSequentialFile(
|
||||||
|
std::unique_ptr<SequentialFile>&& file, size_t readahead_size);
|
||||||
};
|
};
|
||||||
|
|
||||||
// RandomAccessFileReader is a wrapper on top of Env::RnadomAccessFile. It is
|
// RandomAccessFileReader is a wrapper on top of Env::RnadomAccessFile. It is
|
||||||
|
@ -275,7 +275,7 @@ TEST_P(ReadaheadRandomAccessFileTest, SourceStrLenLessThanReadaheadSizeTest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(ReadaheadRandomAccessFileTest,
|
TEST_P(ReadaheadRandomAccessFileTest,
|
||||||
SourceStrLenCanBeGreaterThanReadaheadSizeTest) {
|
SourceStrLenGreaterThanReadaheadSizeTest) {
|
||||||
Random rng(42);
|
Random rng(42);
|
||||||
for (int k = 0; k < 100; ++k) {
|
for (int k = 0; k < 100; ++k) {
|
||||||
size_t strLen = k * GetReadaheadSize() +
|
size_t strLen = k * GetReadaheadSize() +
|
||||||
@ -286,13 +286,13 @@ TEST_P(ReadaheadRandomAccessFileTest,
|
|||||||
for (int test = 1; test <= 100; ++test) {
|
for (int test = 1; test <= 100; ++test) {
|
||||||
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
||||||
size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
ASSERT_EQ(str.substr(offset, std::min(n, str.size() - offset)),
|
ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)),
|
||||||
Read(offset, n));
|
Read(offset, n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(ReadaheadRandomAccessFileTest, NExceedReadaheadTest) {
|
TEST_P(ReadaheadRandomAccessFileTest, ReadExceedsReadaheadSizeTest) {
|
||||||
Random rng(7);
|
Random rng(7);
|
||||||
size_t strLen = 4 * GetReadaheadSize() +
|
size_t strLen = 4 * GetReadaheadSize() +
|
||||||
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
@ -303,7 +303,7 @@ TEST_P(ReadaheadRandomAccessFileTest, NExceedReadaheadTest) {
|
|||||||
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
size_t offset = rng.Uniform(static_cast<int>(strLen));
|
||||||
size_t n =
|
size_t n =
|
||||||
GetReadaheadSize() + rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
GetReadaheadSize() + rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
ASSERT_EQ(str.substr(offset, std::min(n, str.size() - offset)),
|
ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)),
|
||||||
Read(offset, n));
|
Read(offset, n));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -315,13 +315,118 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
SourceStrLenLessThanReadaheadSizeTest, ReadaheadRandomAccessFileTest,
|
SourceStrLenLessThanReadaheadSizeTest, ReadaheadRandomAccessFileTest,
|
||||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
SourceStrLenCanBeGreaterThanReadaheadSizeTest,
|
SourceStrLenGreaterThanReadaheadSizeTest, ReadaheadRandomAccessFileTest,
|
||||||
ReadaheadRandomAccessFileTest,
|
|
||||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
NExceedReadaheadTest, ReadaheadRandomAccessFileTest,
|
ReadExceedsReadaheadSizeTest, ReadaheadRandomAccessFileTest,
|
||||||
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
::testing::ValuesIn(ReadaheadRandomAccessFileTest::GetReadaheadSizeList()));
|
||||||
|
|
||||||
|
class ReadaheadSequentialFileTest : public testing::Test,
|
||||||
|
public testing::WithParamInterface<size_t> {
|
||||||
|
public:
|
||||||
|
static std::vector<size_t> GetReadaheadSizeList() {
|
||||||
|
return {1lu << 12, 1lu << 16};
|
||||||
|
}
|
||||||
|
void SetUp() override {
|
||||||
|
readahead_size_ = GetParam();
|
||||||
|
scratch_.reset(new char[2 * readahead_size_]);
|
||||||
|
ResetSourceStr();
|
||||||
|
}
|
||||||
|
ReadaheadSequentialFileTest() {}
|
||||||
|
std::string Read(size_t n) {
|
||||||
|
Slice result;
|
||||||
|
test_read_holder_->Read(n, &result, scratch_.get());
|
||||||
|
return std::string(result.data(), result.size());
|
||||||
|
}
|
||||||
|
void Skip(size_t n) { test_read_holder_->Skip(n); }
|
||||||
|
void ResetSourceStr(const std::string& str = "") {
|
||||||
|
auto read_holder =
|
||||||
|
std::unique_ptr<SequentialFile>(new test::SeqStringSource(str));
|
||||||
|
test_read_holder_.reset(new SequentialFileReader(std::move(read_holder),
|
||||||
|
"test", readahead_size_));
|
||||||
|
}
|
||||||
|
size_t GetReadaheadSize() const { return readahead_size_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t readahead_size_;
|
||||||
|
std::unique_ptr<SequentialFileReader> test_read_holder_;
|
||||||
|
std::unique_ptr<char[]> scratch_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ReadaheadSequentialFileTest, EmptySourceStrTest) {
|
||||||
|
ASSERT_EQ("", Read(0));
|
||||||
|
ASSERT_EQ("", Read(1));
|
||||||
|
ASSERT_EQ("", Read(13));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(ReadaheadSequentialFileTest, SourceStrLenLessThanReadaheadSizeTest) {
|
||||||
|
std::string str = "abcdefghijklmnopqrs";
|
||||||
|
ResetSourceStr(str);
|
||||||
|
ASSERT_EQ(str.substr(0, 3), Read(3));
|
||||||
|
ASSERT_EQ(str.substr(3, 1), Read(1));
|
||||||
|
ASSERT_EQ(str.substr(4), Read(str.size()));
|
||||||
|
ASSERT_EQ("", Read(100));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(ReadaheadSequentialFileTest, SourceStrLenGreaterThanReadaheadSizeTest) {
|
||||||
|
Random rng(42);
|
||||||
|
for (int s = 0; s < 1; ++s) {
|
||||||
|
for (int k = 0; k < 100; ++k) {
|
||||||
|
size_t strLen = k * GetReadaheadSize() +
|
||||||
|
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
|
std::string str =
|
||||||
|
test::RandomHumanReadableString(&rng, static_cast<int>(strLen));
|
||||||
|
ResetSourceStr(str);
|
||||||
|
size_t offset = 0;
|
||||||
|
for (int test = 1; test <= 100; ++test) {
|
||||||
|
size_t n = rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
|
if (s && test % 2) {
|
||||||
|
Skip(n);
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n));
|
||||||
|
}
|
||||||
|
offset = std::min(offset + n, strLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(ReadaheadSequentialFileTest, ReadExceedsReadaheadSizeTest) {
|
||||||
|
Random rng(42);
|
||||||
|
for (int s = 0; s < 1; ++s) {
|
||||||
|
for (int k = 0; k < 100; ++k) {
|
||||||
|
size_t strLen = k * GetReadaheadSize() +
|
||||||
|
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
|
std::string str =
|
||||||
|
test::RandomHumanReadableString(&rng, static_cast<int>(strLen));
|
||||||
|
ResetSourceStr(str);
|
||||||
|
size_t offset = 0;
|
||||||
|
for (int test = 1; test <= 100; ++test) {
|
||||||
|
size_t n = GetReadaheadSize() +
|
||||||
|
rng.Uniform(static_cast<int>(GetReadaheadSize()));
|
||||||
|
if (s && test % 2) {
|
||||||
|
Skip(n);
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(str.substr(offset, std::min(n, strLen - offset)), Read(n));
|
||||||
|
}
|
||||||
|
offset = std::min(offset + n, strLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
EmptySourceStrTest, ReadaheadSequentialFileTest,
|
||||||
|
::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
SourceStrLenLessThanReadaheadSizeTest, ReadaheadSequentialFileTest,
|
||||||
|
::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
SourceStrLenGreaterThanReadaheadSizeTest, ReadaheadSequentialFileTest,
|
||||||
|
::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
ReadExceedsReadaheadSizeTest, ReadaheadSequentialFileTest,
|
||||||
|
::testing::ValuesIn(ReadaheadSequentialFileTest::GetReadaheadSizeList()));
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user