readahead backwards from sst end
Summary: prefetch some data from the end of the file for each compaction to reduce IO. Closes https://github.com/facebook/rocksdb/pull/2149 Differential Revision: D4880576 Pulled By: lightmark fbshipit-source-id: aa767cd1afc84c541837fbf1ad6c0d45b34d3932
This commit is contained in:
parent
8d7edd5908
commit
95c5e2dc6e
@ -472,6 +472,7 @@ class SequentialFile {
|
||||
// A file abstraction for randomly reading the contents of a file.
|
||||
class RandomAccessFile {
|
||||
public:
|
||||
|
||||
RandomAccessFile() { }
|
||||
virtual ~RandomAccessFile();
|
||||
|
||||
@ -488,6 +489,11 @@ class RandomAccessFile {
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const = 0;
|
||||
|
||||
// Readahead the file starting from offset by n bytes for caching.
|
||||
virtual Status Prefetch(uint64_t offset, size_t n) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Used by the file_reader_writer to decide if the ReadAhead wrapper
|
||||
// should simply forward the call and do not enact buffering or locking.
|
||||
virtual bool ShouldForwardRawRequest() const {
|
||||
|
@ -530,7 +530,12 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||
table_reader->reset();
|
||||
|
||||
Footer footer;
|
||||
auto s = ReadFooterFromFile(file.get(), file_size, &footer,
|
||||
|
||||
// Before read footer, readahead backwards to prefetch data
|
||||
Status s =
|
||||
file->Prefetch((file_size < 512 * 1024 ? 0 : file_size - 512 * 1024),
|
||||
512 * 1024 /* 512 KB prefetching */);
|
||||
s = ReadFooterFromFile(file.get(), file_size, &footer,
|
||||
kBlockBasedTableMagicNumber);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
@ -541,8 +546,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||
"version of RocksDB?");
|
||||
}
|
||||
|
||||
// We've successfully read the footer and the index block: we're
|
||||
// ready to serve requests.
|
||||
// We've successfully read the footer. We are ready to serve requests.
|
||||
// Better not mutate rep_ after the creation. eg. internal_prefix_transform
|
||||
// raw pointer will be used to create HashIndexReader, whose reset may
|
||||
// access a dangling pointer.
|
||||
|
@ -472,7 +472,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
// complitely or partially in the buffer
|
||||
// If it's completely cached, including end of file case when offset + n is
|
||||
// greater than EOF, return
|
||||
if (TryReadFromCache_(offset, n, &cached_len, scratch) &&
|
||||
if (TryReadFromCache(offset, n, &cached_len, scratch) &&
|
||||
(cached_len == n ||
|
||||
// End of file
|
||||
buffer_len_ < readahead_size_)) {
|
||||
@ -484,34 +484,34 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
// chunk_offset equals to advanced_offset
|
||||
size_t chunk_offset = TruncateToPageBoundary(alignment_, advanced_offset);
|
||||
Slice readahead_result;
|
||||
Status s = file_->Read(chunk_offset, readahead_size_, &readahead_result,
|
||||
buffer_.BufferStart());
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
||||
// exceed the file end position, so the following check is required
|
||||
if (advanced_offset < chunk_offset + readahead_result.size()) {
|
||||
// In the case of cache miss, the first chunk_padding bytes in buffer_ are
|
||||
// stored for alignment only and must be skipped
|
||||
size_t chunk_padding = advanced_offset - chunk_offset;
|
||||
auto remaining_len =
|
||||
std::min(readahead_result.size() - chunk_padding, n - cached_len);
|
||||
memcpy(scratch + cached_len, readahead_result.data() + chunk_padding,
|
||||
remaining_len);
|
||||
*result = Slice(scratch, cached_len + remaining_len);
|
||||
} else {
|
||||
*result = Slice(scratch, cached_len);
|
||||
}
|
||||
|
||||
if (readahead_result.data() == buffer_.BufferStart()) {
|
||||
buffer_offset_ = chunk_offset;
|
||||
buffer_len_ = readahead_result.size();
|
||||
} else {
|
||||
buffer_len_ = 0;
|
||||
Status s = ReadIntoBuffer(chunk_offset, readahead_size_);
|
||||
if (s.ok()) {
|
||||
// In the case of cache miss, i.e. when cached_len equals 0, an offset can
|
||||
// exceed the file end position, so the following check is required
|
||||
if (advanced_offset < chunk_offset + buffer_len_) {
|
||||
// In the case of cache miss, the first chunk_padding bytes in buffer_
|
||||
// are
|
||||
// stored for alignment only and must be skipped
|
||||
size_t chunk_padding = advanced_offset - chunk_offset;
|
||||
auto remaining_len =
|
||||
std::min(buffer_len_ - chunk_padding, n - cached_len);
|
||||
memcpy(scratch + cached_len, buffer_.BufferStart() + chunk_padding,
|
||||
remaining_len);
|
||||
*result = Slice(scratch, cached_len + remaining_len);
|
||||
} else {
|
||||
*result = Slice(scratch, cached_len);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
virtual Status Prefetch(uint64_t offset, size_t n) override {
|
||||
size_t prefetch_offset = TruncateToPageBoundary(alignment_, offset);
|
||||
if (prefetch_offset == buffer_offset_) {
|
||||
return Status::OK();
|
||||
}
|
||||
return ReadIntoBuffer(prefetch_offset, offset - prefetch_offset + n);
|
||||
}
|
||||
|
||||
virtual size_t GetUniqueId(char* id, size_t max_size) const override {
|
||||
@ -529,7 +529,7 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
}
|
||||
|
||||
private:
|
||||
bool TryReadFromCache_(uint64_t offset, size_t n, size_t* cached_len,
|
||||
bool TryReadFromCache(uint64_t offset, size_t n, size_t* cached_len,
|
||||
char* scratch) const {
|
||||
if (offset < buffer_offset_ || offset >= buffer_offset_ + buffer_len_) {
|
||||
*cached_len = 0;
|
||||
@ -542,15 +542,28 @@ class ReadaheadRandomAccessFile : public RandomAccessFile {
|
||||
return true;
|
||||
}
|
||||
|
||||
Status ReadIntoBuffer(uint64_t offset, size_t n) const {
|
||||
if (n > buffer_.Capacity()) {
|
||||
n = buffer_.Capacity();
|
||||
}
|
||||
Slice result;
|
||||
Status s = file_->Read(offset, n, &result, buffer_.BufferStart());
|
||||
if (s.ok()) {
|
||||
buffer_offset_ = offset;
|
||||
buffer_len_ = result.size();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::unique_ptr<RandomAccessFile> file_;
|
||||
const size_t alignment_;
|
||||
size_t readahead_size_;
|
||||
const bool forward_calls_;
|
||||
|
||||
mutable std::mutex lock_;
|
||||
mutable std::mutex lock_;
|
||||
mutable AlignedBuffer buffer_;
|
||||
mutable uint64_t buffer_offset_;
|
||||
mutable size_t buffer_len_;
|
||||
mutable uint64_t buffer_offset_;
|
||||
mutable size_t buffer_len_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
@ -92,6 +92,10 @@ class RandomAccessFileReader {
|
||||
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
|
||||
|
||||
Status Prefetch(uint64_t offset, size_t n) const {
|
||||
return file_->Prefetch(offset, n);
|
||||
}
|
||||
|
||||
RandomAccessFile* file() { return file_.get(); }
|
||||
|
||||
bool use_direct_io() const { return file_->use_direct_io(); }
|
||||
|
Loading…
Reference in New Issue
Block a user