Combine the read-ahead logic for user reads and compaction reads (#5431)
Summary: Currently the read-ahead logic for user reads and compaction reads go through different code paths where compaction reads create new table readers and use `ReadaheadRandomAccessFile`. This change is to unify read-ahead logic to use read-ahead in BlockBasedTableReader::InitDataBlock(). As a result of the change `ReadAheadRandomAccessFile` class and `new_table_reader_for_compaction_inputs` option will no longer be used. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5431 Test Plan: make check Here is the benchmarking - https://gist.github.com/vjnadimpalli/083cf423f7b6aa12dcdb14c858bc18a5 Differential Revision: D15772533 Pulled By: vjnadimpalli fbshipit-source-id: b71dca710590471ede6fb37553388654e2e479b9
This commit is contained in:
parent
fe90ed7a70
commit
24b118ad98
@ -497,14 +497,14 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
|
||||
|
||||
// Create new iterator for:
|
||||
// (1) 1 for verifying flush results
|
||||
// (2) 3 for compaction input files
|
||||
// (3) 1 for verifying compaction results.
|
||||
ASSERT_EQ(num_new_table_reader, 5);
|
||||
// (2) 1 for verifying compaction results.
|
||||
// (3) New TableReaders will not be created for compaction inputs
|
||||
ASSERT_EQ(num_new_table_reader, 2);
|
||||
|
||||
num_table_cache_lookup = 0;
|
||||
num_new_table_reader = 0;
|
||||
ASSERT_EQ(Key(1), Get(Key(1)));
|
||||
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3);
|
||||
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 5);
|
||||
ASSERT_EQ(num_new_table_reader, 0);
|
||||
|
||||
num_table_cache_lookup = 0;
|
||||
@ -519,13 +519,14 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) {
|
||||
// May preload table cache too.
|
||||
ASSERT_GE(num_table_cache_lookup, 1);
|
||||
old_num_table_cache_lookup2 = num_table_cache_lookup;
|
||||
// One for compaction input, one for verifying compaction results.
|
||||
ASSERT_EQ(num_new_table_reader, 2);
|
||||
// One for verifying compaction results.
|
||||
// No new iterator created for compaction.
|
||||
ASSERT_EQ(num_new_table_reader, 1);
|
||||
|
||||
num_table_cache_lookup = 0;
|
||||
num_new_table_reader = 0;
|
||||
ASSERT_EQ(Key(1), Get(Key(1)));
|
||||
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 2);
|
||||
ASSERT_EQ(num_table_cache_lookup + old_num_table_cache_lookup2, 3);
|
||||
ASSERT_EQ(num_new_table_reader, 0);
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||
@ -4339,12 +4340,6 @@ TEST_P(DBCompactionDirectIOTest, DirectIO) {
|
||||
options.env = new MockEnv(Env::Default());
|
||||
Reopen(options);
|
||||
bool readahead = false;
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"TableCache::NewIterator:for_compaction", [&](void* arg) {
|
||||
bool* use_direct_reads = static_cast<bool*>(arg);
|
||||
ASSERT_EQ(*use_direct_reads,
|
||||
options.use_direct_reads);
|
||||
});
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"CompactionJob::OpenCompactionOutputFile", [&](void* arg) {
|
||||
bool* use_direct_writes = static_cast<bool*>(arg);
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include "monitoring/perf_context_imp.h"
|
||||
#include "rocksdb/statistics.h"
|
||||
#include "table/block_based/block_based_table_reader.h"
|
||||
#include "table/get_context.h"
|
||||
#include "table/internal_iterator.h"
|
||||
#include "table/iterator_wrapper.h"
|
||||
@ -43,13 +44,6 @@ static void UnrefEntry(void* arg1, void* arg2) {
|
||||
cache->Release(h);
|
||||
}
|
||||
|
||||
static void DeleteTableReader(void* arg1, void* arg2) {
|
||||
TableReader* table_reader = reinterpret_cast<TableReader*>(arg1);
|
||||
Statistics* stats = reinterpret_cast<Statistics*>(arg2);
|
||||
RecordTick(stats, NO_FILE_CLOSES);
|
||||
delete table_reader;
|
||||
}
|
||||
|
||||
static Slice GetSliceForFileNumber(const uint64_t* file_number) {
|
||||
return Slice(reinterpret_cast<const char*>(file_number),
|
||||
sizeof(*file_number));
|
||||
@ -96,8 +90,8 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
|
||||
Status TableCache::GetTableReader(
|
||||
const EnvOptions& env_options,
|
||||
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
|
||||
bool sequential_mode, size_t readahead, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
|
||||
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
const SliceTransform* prefix_extractor, bool skip_filters, int level,
|
||||
bool prefetch_index_and_filter_in_cache, bool for_compaction) {
|
||||
std::string fname =
|
||||
@ -107,13 +101,6 @@ Status TableCache::GetTableReader(
|
||||
|
||||
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
|
||||
if (s.ok()) {
|
||||
if (readahead > 0 && !env_options.use_mmap_reads) {
|
||||
// Not compatible with mmap files since ReadaheadRandomAccessFile requires
|
||||
// its wrapped file's Read() to copy data into the provided scratch
|
||||
// buffer, which mmap files don't use.
|
||||
// TODO(ajkr): try madvise for mmap files in place of buffered readahead.
|
||||
file = NewReadaheadRandomAccessFile(std::move(file), readahead);
|
||||
}
|
||||
if (!sequential_mode && ioptions_.advise_random_on_open) {
|
||||
file->Hint(RandomAccessFile::RANDOM);
|
||||
}
|
||||
@ -164,10 +151,9 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
||||
}
|
||||
std::unique_ptr<TableReader> table_reader;
|
||||
s = GetTableReader(env_options, internal_comparator, fd,
|
||||
false /* sequential mode */, 0 /* readahead */,
|
||||
record_read_stats, file_read_hist, &table_reader,
|
||||
prefix_extractor, skip_filters, level,
|
||||
prefetch_index_and_filter_in_cache);
|
||||
false /* sequential mode */, record_read_stats,
|
||||
file_read_hist, &table_reader, prefix_extractor,
|
||||
skip_filters, level, prefetch_index_and_filter_in_cache);
|
||||
if (!s.ok()) {
|
||||
assert(table_reader == nullptr);
|
||||
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
|
||||
@ -196,48 +182,21 @@ InternalIterator* TableCache::NewIterator(
|
||||
PERF_TIMER_GUARD(new_table_iterator_nanos);
|
||||
|
||||
Status s;
|
||||
bool create_new_table_reader = false;
|
||||
TableReader* table_reader = nullptr;
|
||||
Cache::Handle* handle = nullptr;
|
||||
if (table_reader_ptr != nullptr) {
|
||||
*table_reader_ptr = nullptr;
|
||||
}
|
||||
size_t readahead = 0;
|
||||
if (for_compaction) {
|
||||
#ifndef NDEBUG
|
||||
bool use_direct_reads_for_compaction = env_options.use_direct_reads;
|
||||
TEST_SYNC_POINT_CALLBACK("TableCache::NewIterator:for_compaction",
|
||||
&use_direct_reads_for_compaction);
|
||||
#endif // !NDEBUG
|
||||
if (ioptions_.new_table_reader_for_compaction_inputs) {
|
||||
// get compaction_readahead_size from env_options allows us to set the
|
||||
// value dynamically
|
||||
readahead = env_options.compaction_readahead_size;
|
||||
create_new_table_reader = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto& fd = file_meta.fd;
|
||||
if (create_new_table_reader) {
|
||||
std::unique_ptr<TableReader> table_reader_unique_ptr;
|
||||
s = GetTableReader(
|
||||
env_options, icomparator, fd, true /* sequential_mode */, readahead,
|
||||
!for_compaction /* record stats */, nullptr, &table_reader_unique_ptr,
|
||||
prefix_extractor, false /* skip_filters */, level,
|
||||
true /* prefetch_index_and_filter_in_cache */, for_compaction);
|
||||
table_reader = fd.table_reader;
|
||||
if (table_reader == nullptr) {
|
||||
s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
!for_compaction /* record read_stats */, file_read_hist,
|
||||
skip_filters, level);
|
||||
if (s.ok()) {
|
||||
table_reader = table_reader_unique_ptr.release();
|
||||
}
|
||||
} else {
|
||||
table_reader = fd.table_reader;
|
||||
if (table_reader == nullptr) {
|
||||
s = FindTable(env_options, icomparator, fd, &handle, prefix_extractor,
|
||||
options.read_tier == kBlockCacheTier /* no_io */,
|
||||
!for_compaction /* record read_stats */, file_read_hist,
|
||||
skip_filters, level);
|
||||
if (s.ok()) {
|
||||
table_reader = GetTableReaderFromHandle(handle);
|
||||
}
|
||||
table_reader = GetTableReaderFromHandle(handle);
|
||||
}
|
||||
}
|
||||
InternalIterator* result = nullptr;
|
||||
@ -247,13 +206,10 @@ InternalIterator* TableCache::NewIterator(
|
||||
result = NewEmptyInternalIterator<Slice>(arena);
|
||||
} else {
|
||||
result = table_reader->NewIterator(options, prefix_extractor, arena,
|
||||
skip_filters, for_compaction);
|
||||
skip_filters, for_compaction,
|
||||
env_options.compaction_readahead_size);
|
||||
}
|
||||
if (create_new_table_reader) {
|
||||
assert(handle == nullptr);
|
||||
result->RegisterCleanup(&DeleteTableReader, table_reader,
|
||||
ioptions_.statistics);
|
||||
} else if (handle != nullptr) {
|
||||
if (handle != nullptr) {
|
||||
result->RegisterCleanup(&UnrefEntry, cache_, handle);
|
||||
handle = nullptr; // prevent from releasing below
|
||||
}
|
||||
|
@ -177,8 +177,7 @@ class TableCache {
|
||||
Status GetTableReader(const EnvOptions& env_options,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const FileDescriptor& fd, bool sequential_mode,
|
||||
size_t readahead, bool record_read_stats,
|
||||
HistogramImpl* file_read_hist,
|
||||
bool record_read_stats, HistogramImpl* file_read_hist,
|
||||
std::unique_ptr<TableReader>* table_reader,
|
||||
const SliceTransform* prefix_extractor = nullptr,
|
||||
bool skip_filters = false, int level = -1,
|
||||
|
@ -760,6 +760,8 @@ struct DBOptions {
|
||||
// for this mode if using block-based table.
|
||||
//
|
||||
// Default: false
|
||||
// This flag has no affect on the behavior of compaction and plan to delete
|
||||
// in the future.
|
||||
bool new_table_reader_for_compaction_inputs = false;
|
||||
|
||||
// If non-zero, we perform bigger reads when doing compaction. If you're
|
||||
|
@ -83,12 +83,13 @@ Status ReadBlockFromFile(
|
||||
bool do_uncompress, bool maybe_compressed, BlockType block_type,
|
||||
const UncompressionDict& uncompression_dict,
|
||||
const PersistentCacheOptions& cache_options, SequenceNumber global_seqno,
|
||||
size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator) {
|
||||
size_t read_amp_bytes_per_bit, MemoryAllocator* memory_allocator,
|
||||
bool for_compaction = false) {
|
||||
BlockContents contents;
|
||||
BlockFetcher block_fetcher(file, prefetch_buffer, footer, options, handle,
|
||||
&contents, ioptions, do_uncompress,
|
||||
maybe_compressed, block_type, uncompression_dict,
|
||||
cache_options, memory_allocator);
|
||||
BlockFetcher block_fetcher(
|
||||
file, prefetch_buffer, footer, options, handle, &contents, ioptions,
|
||||
do_uncompress, maybe_compressed, block_type, uncompression_dict,
|
||||
cache_options, memory_allocator, nullptr, for_compaction);
|
||||
Status s = block_fetcher.ReadBlockContents();
|
||||
if (s.ok()) {
|
||||
result->reset(new Block(std::move(contents), global_seqno,
|
||||
@ -1906,7 +1907,7 @@ CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
|
||||
if (!is_a_filter_partition && rep_->filter_entry.IsCached()) {
|
||||
return {rep_->filter_entry.GetValue(), /*cache=*/nullptr,
|
||||
/*cache_handle=*/nullptr, /*own_value=*/false};
|
||||
/*cache_handle=*/nullptr, /*own_value=*/false};
|
||||
}
|
||||
|
||||
PERF_TIMER_GUARD(read_filter_block_nanos);
|
||||
@ -2075,7 +2076,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
||||
const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
|
||||
BlockType block_type, bool key_includes_seq, bool index_key_is_full,
|
||||
GetContext* get_context, BlockCacheLookupContext* lookup_context, Status s,
|
||||
FilePrefetchBuffer* prefetch_buffer) const {
|
||||
FilePrefetchBuffer* prefetch_buffer, bool for_compaction) const {
|
||||
PERF_TIMER_GUARD(new_table_block_iter_nanos);
|
||||
|
||||
TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
|
||||
@ -2094,7 +2095,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
||||
|
||||
CachableEntry<Block> block;
|
||||
s = RetrieveBlock(prefetch_buffer, ro, handle, uncompression_dict, &block,
|
||||
block_type, get_context, lookup_context);
|
||||
block_type, get_context, lookup_context, for_compaction);
|
||||
|
||||
if (!s.ok()) {
|
||||
assert(block.IsEmpty());
|
||||
@ -2144,6 +2145,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
||||
s = block_cache->Insert(unique_key, nullptr,
|
||||
block.GetValue()->ApproximateMemoryUsage(),
|
||||
nullptr, &cache_handle);
|
||||
|
||||
if (s.ok()) {
|
||||
assert(cache_handle != nullptr);
|
||||
iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache,
|
||||
@ -2297,7 +2299,8 @@ Status BlockBasedTable::RetrieveBlock(
|
||||
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
|
||||
const BlockHandle& handle, const UncompressionDict& uncompression_dict,
|
||||
CachableEntry<Block>* block_entry, BlockType block_type,
|
||||
GetContext* get_context, BlockCacheLookupContext* lookup_context) const {
|
||||
GetContext* get_context, BlockCacheLookupContext* lookup_context,
|
||||
bool for_compaction) const {
|
||||
assert(block_entry);
|
||||
assert(block_entry->IsEmpty());
|
||||
|
||||
@ -2340,7 +2343,7 @@ Status BlockBasedTable::RetrieveBlock(
|
||||
block_type == BlockType::kData
|
||||
? rep_->table_options.read_amp_bytes_per_bit
|
||||
: 0,
|
||||
GetMemoryAllocator(rep_->table_options));
|
||||
GetMemoryAllocator(rep_->table_options), for_compaction);
|
||||
}
|
||||
|
||||
if (!s.ok()) {
|
||||
@ -2714,13 +2717,18 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
|
||||
rep->file.get(), read_options_.readahead_size,
|
||||
read_options_.readahead_size));
|
||||
}
|
||||
} else if (!prefetch_buffer_) {
|
||||
prefetch_buffer_.reset(
|
||||
new FilePrefetchBuffer(rep->file.get(), compaction_readahead_size_,
|
||||
compaction_readahead_size_));
|
||||
}
|
||||
|
||||
Status s;
|
||||
table_->NewDataBlockIterator<TBlockIter>(
|
||||
read_options_, data_block_handle, &block_iter_, block_type_,
|
||||
key_includes_seq_, index_key_is_full_,
|
||||
/*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get());
|
||||
/*get_context=*/nullptr, &lookup_context_, s, prefetch_buffer_.get(),
|
||||
for_compaction_);
|
||||
block_iter_points_to_real_block_ = true;
|
||||
}
|
||||
}
|
||||
@ -2806,7 +2814,8 @@ void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
|
||||
|
||||
InternalIterator* BlockBasedTable::NewIterator(
|
||||
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
|
||||
Arena* arena, bool skip_filters, bool for_compaction) {
|
||||
Arena* arena, bool skip_filters, bool for_compaction,
|
||||
size_t compaction_readahead_size) {
|
||||
BlockCacheLookupContext lookup_context{
|
||||
for_compaction ? BlockCacheLookupCaller::kCompaction
|
||||
: BlockCacheLookupCaller::kUserIterator};
|
||||
@ -2823,7 +2832,8 @@ InternalIterator* BlockBasedTable::NewIterator(
|
||||
!skip_filters && !read_options.total_order_seek &&
|
||||
prefix_extractor != nullptr,
|
||||
need_upper_bound_check, prefix_extractor, BlockType::kData,
|
||||
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
|
||||
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
|
||||
compaction_readahead_size);
|
||||
} else {
|
||||
auto* mem =
|
||||
arena->AllocateAligned(sizeof(BlockBasedTableIterator<DataBlockIter>));
|
||||
@ -2835,7 +2845,8 @@ InternalIterator* BlockBasedTable::NewIterator(
|
||||
!skip_filters && !read_options.total_order_seek &&
|
||||
prefix_extractor != nullptr,
|
||||
need_upper_bound_check, prefix_extractor, BlockType::kData,
|
||||
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
|
||||
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
|
||||
compaction_readahead_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,6 +123,8 @@ class BlockBasedTable : public TableReader {
|
||||
// The result of NewIterator() is initially invalid (caller must
|
||||
// call one of the Seek methods on the iterator before using it).
|
||||
// @param skip_filters Disables loading/accessing the filter block
|
||||
// compaction_readahead_size: its value will only be used if for_compaction =
|
||||
// true
|
||||
InternalIterator* NewIterator(
|
||||
const ReadOptions&, const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr, bool skip_filters = false,
|
||||
@ -131,7 +133,8 @@ class BlockBasedTable : public TableReader {
|
||||
// i.e., it will populate the block cache with blocks in the new SST
|
||||
// files. We treat those as a user is calling iterator for now. We should
|
||||
// differentiate the callers.
|
||||
bool for_compaction = false) override;
|
||||
bool for_compaction = false,
|
||||
size_t compaction_readahead_size = 0) override;
|
||||
|
||||
FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
|
||||
const ReadOptions& read_options) override;
|
||||
@ -234,7 +237,7 @@ class BlockBasedTable : public TableReader {
|
||||
TBlockIter* input_iter, BlockType block_type, bool key_includes_seq,
|
||||
bool index_key_is_full, GetContext* get_context,
|
||||
BlockCacheLookupContext* lookup_context, Status s,
|
||||
FilePrefetchBuffer* prefetch_buffer) const;
|
||||
FilePrefetchBuffer* prefetch_buffer, bool for_compaction = false) const;
|
||||
|
||||
class PartitionedIndexIteratorState;
|
||||
|
||||
@ -283,7 +286,8 @@ class BlockBasedTable : public TableReader {
|
||||
const UncompressionDict& uncompression_dict,
|
||||
CachableEntry<Block>* block_entry, BlockType block_type,
|
||||
GetContext* get_context,
|
||||
BlockCacheLookupContext* lookup_context) const;
|
||||
BlockCacheLookupContext* lookup_context,
|
||||
bool for_compaction = false) const;
|
||||
|
||||
// For the following two functions:
|
||||
// if `no_io == true`, we will not try to read filter/index from sst file
|
||||
@ -596,6 +600,8 @@ struct BlockBasedTable::Rep {
|
||||
// Iterates over the contents of BlockBasedTable.
|
||||
template <class TBlockIter, typename TValue = Slice>
|
||||
class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
// compaction_readahead_size: its value will only be used if for_compaction =
|
||||
// true
|
||||
public:
|
||||
BlockBasedTableIterator(const BlockBasedTable* table,
|
||||
const ReadOptions& read_options,
|
||||
@ -605,7 +611,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
const SliceTransform* prefix_extractor,
|
||||
BlockType block_type, bool key_includes_seq = true,
|
||||
bool index_key_is_full = true,
|
||||
bool for_compaction = false)
|
||||
bool for_compaction = false,
|
||||
size_t compaction_readahead_size = 0)
|
||||
: InternalIteratorBase<TValue>(false),
|
||||
table_(table),
|
||||
read_options_(read_options),
|
||||
@ -621,6 +628,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
key_includes_seq_(key_includes_seq),
|
||||
index_key_is_full_(index_key_is_full),
|
||||
for_compaction_(for_compaction),
|
||||
compaction_readahead_size_(compaction_readahead_size),
|
||||
lookup_context_(for_compaction
|
||||
? BlockCacheLookupCaller::kCompaction
|
||||
: BlockCacheLookupCaller::kUserIterator) {}
|
||||
@ -734,6 +742,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
bool index_key_is_full_;
|
||||
// If this iterator is created for compaction
|
||||
bool for_compaction_;
|
||||
// Readahead size used in compaction, its value is used only if
|
||||
// for_compaction_ = true
|
||||
size_t compaction_readahead_size_;
|
||||
BlockHandle prev_index_value_;
|
||||
BlockCacheLookupContext lookup_context_;
|
||||
|
||||
|
@ -93,7 +93,8 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
|
||||
if (prefetch_buffer_ != nullptr &&
|
||||
prefetch_buffer_->TryReadFromCache(
|
||||
handle_.offset(),
|
||||
static_cast<size_t>(handle_.size()) + kBlockTrailerSize, &slice_)) {
|
||||
static_cast<size_t>(handle_.size()) + kBlockTrailerSize, &slice_,
|
||||
for_compaction_)) {
|
||||
block_size_ = static_cast<size_t>(handle_.size());
|
||||
CheckBlockChecksum();
|
||||
if (!status_.ok()) {
|
||||
@ -217,7 +218,7 @@ Status BlockFetcher::ReadBlockContents() {
|
||||
PERF_TIMER_GUARD(block_read_time);
|
||||
// Actual file read
|
||||
status_ = file_->Read(handle_.offset(), block_size_ + kBlockTrailerSize,
|
||||
&slice_, used_buf_);
|
||||
&slice_, used_buf_, for_compaction_);
|
||||
}
|
||||
PERF_COUNTER_ADD(block_read_count, 1);
|
||||
|
||||
|
@ -44,7 +44,8 @@ class BlockFetcher {
|
||||
const UncompressionDict& uncompression_dict,
|
||||
const PersistentCacheOptions& cache_options,
|
||||
MemoryAllocator* memory_allocator = nullptr,
|
||||
MemoryAllocator* memory_allocator_compressed = nullptr)
|
||||
MemoryAllocator* memory_allocator_compressed = nullptr,
|
||||
bool for_compaction = false)
|
||||
: file_(file),
|
||||
prefetch_buffer_(prefetch_buffer),
|
||||
footer_(footer),
|
||||
@ -58,7 +59,9 @@ class BlockFetcher {
|
||||
uncompression_dict_(uncompression_dict),
|
||||
cache_options_(cache_options),
|
||||
memory_allocator_(memory_allocator),
|
||||
memory_allocator_compressed_(memory_allocator_compressed) {}
|
||||
memory_allocator_compressed_(memory_allocator_compressed),
|
||||
for_compaction_(for_compaction) {}
|
||||
|
||||
Status ReadBlockContents();
|
||||
CompressionType get_compression_type() const { return compression_type_; }
|
||||
|
||||
@ -88,6 +91,7 @@ class BlockFetcher {
|
||||
char stack_buf_[kDefaultStackBufferSize];
|
||||
bool got_from_prefetch_buffer_ = false;
|
||||
rocksdb::CompressionType compression_type_;
|
||||
bool for_compaction_ = false;
|
||||
|
||||
// return true if found
|
||||
bool TryGetUncompressBlockFromPersistentCache();
|
||||
|
@ -377,7 +377,8 @@ Slice CuckooTableIterator::value() const {
|
||||
InternalIterator* CuckooTableReader::NewIterator(
|
||||
const ReadOptions& /*read_options*/,
|
||||
const SliceTransform* /* prefix_extractor */, Arena* arena,
|
||||
bool /*skip_filters*/, bool /*for_compaction*/) {
|
||||
bool /*skip_filters*/, bool /*for_compaction*/,
|
||||
size_t /*compaction_readahead_size*/) {
|
||||
if (!status().ok()) {
|
||||
return NewErrorInternalIterator<Slice>(
|
||||
Status::Corruption("CuckooTableReader status is not okay."), arena);
|
||||
|
@ -45,11 +45,15 @@ class CuckooTableReader: public TableReader {
|
||||
GetContext* get_context, const SliceTransform* prefix_extractor,
|
||||
bool skip_filters = false) override;
|
||||
|
||||
// Returns a new iterator over table contents
|
||||
// compaction_readahead_size: its value will only be used if for_compaction =
|
||||
// true
|
||||
InternalIterator* NewIterator(const ReadOptions&,
|
||||
const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr,
|
||||
bool skip_filters = false,
|
||||
bool for_compaction = false) override;
|
||||
bool for_compaction = false,
|
||||
size_t compaction_readahead_size = 0) override;
|
||||
void Prepare(const Slice& target) override;
|
||||
|
||||
// Report an approximation of how much memory has been used.
|
||||
|
@ -487,12 +487,11 @@ Status ReadMetaBlock(RandomAccessFileReader* file,
|
||||
read_options.verify_checksums = false;
|
||||
PersistentCacheOptions cache_options;
|
||||
|
||||
BlockFetcher block_fetcher(file, prefetch_buffer, footer, read_options,
|
||||
metaindex_handle, &metaindex_contents, ioptions,
|
||||
false /* decompress */, false /*maybe_compressed*/,
|
||||
BlockType::kMetaIndex,
|
||||
UncompressionDict::GetEmptyDict(), cache_options,
|
||||
memory_allocator);
|
||||
BlockFetcher block_fetcher(
|
||||
file, prefetch_buffer, footer, read_options, metaindex_handle,
|
||||
&metaindex_contents, ioptions, false /* decompress */,
|
||||
false /*maybe_compressed*/, BlockType::kMetaIndex,
|
||||
UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
|
||||
status = block_fetcher.ReadBlockContents();
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
|
@ -34,7 +34,8 @@ stl_wrappers::KVMap MakeMockFile(
|
||||
|
||||
InternalIterator* MockTableReader::NewIterator(
|
||||
const ReadOptions&, const SliceTransform* /* prefix_extractor */,
|
||||
Arena* /*arena*/, bool /*skip_filters*/, bool /*for_compaction*/) {
|
||||
Arena* /*arena*/, bool /*skip_filters*/, bool /*for_compaction*/,
|
||||
size_t /*compaction_readahead_size*/) {
|
||||
return new MockTableIterator(table_);
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,8 @@ class MockTableReader : public TableReader {
|
||||
const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr,
|
||||
bool skip_filters = false,
|
||||
bool for_compaction = false) override;
|
||||
bool for_compaction = false,
|
||||
size_t compaction_readahead_size = 0) override;
|
||||
|
||||
Status Get(const ReadOptions& readOptions, const Slice& key,
|
||||
GetContext* get_context, const SliceTransform* prefix_extractor,
|
||||
|
@ -196,7 +196,8 @@ void PlainTableReader::SetupForCompaction() {
|
||||
|
||||
InternalIterator* PlainTableReader::NewIterator(
|
||||
const ReadOptions& options, const SliceTransform* /* prefix_extractor */,
|
||||
Arena* arena, bool /*skip_filters*/, bool /*for_compaction*/) {
|
||||
Arena* arena, bool /*skip_filters*/, bool /*for_compaction*/,
|
||||
size_t /*compaction_readahead_size*/) {
|
||||
bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek;
|
||||
if (arena == nullptr) {
|
||||
return new PlainTableIterator(this, use_prefix_seek);
|
||||
|
@ -77,11 +77,15 @@ class PlainTableReader: public TableReader {
|
||||
bool full_scan_mode, const bool immortal_table = false,
|
||||
const SliceTransform* prefix_extractor = nullptr);
|
||||
|
||||
// Returns new iterator over table contents
|
||||
// compaction_readahead_size: its value will only be used if for_compaction =
|
||||
// true
|
||||
InternalIterator* NewIterator(const ReadOptions&,
|
||||
const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr,
|
||||
bool skip_filters = false,
|
||||
bool for_compaction = false) override;
|
||||
bool for_compaction = false,
|
||||
size_t compaction_readahead_size = 0) override;
|
||||
|
||||
void Prepare(const Slice& target) override;
|
||||
|
||||
|
@ -44,11 +44,12 @@ class TableReader {
|
||||
// all the states but those allocated in arena.
|
||||
// skip_filters: disables checking the bloom filters even if they exist. This
|
||||
// option is effective only for block-based table format.
|
||||
virtual InternalIterator* NewIterator(const ReadOptions&,
|
||||
const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr,
|
||||
bool skip_filters = false,
|
||||
bool for_compaction = false) = 0;
|
||||
// compaction_readahead_size: its value will only be used if for_compaction =
|
||||
// true
|
||||
virtual InternalIterator* NewIterator(
|
||||
const ReadOptions&, const SliceTransform* prefix_extractor,
|
||||
Arena* arena = nullptr, bool skip_filters = false,
|
||||
bool for_compaction = false, size_t compaction_readahead_size = 0) = 0;
|
||||
|
||||
virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
|
||||
const ReadOptions& /*read_options*/) {
|
||||
|
@ -3590,7 +3590,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
|
||||
BlockContents metaindex_contents;
|
||||
|
||||
BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
|
||||
&metaindex_contents);
|
||||
&metaindex_contents);
|
||||
Block metaindex_block(std::move(metaindex_contents),
|
||||
kDisableGlobalSequenceNumber);
|
||||
|
||||
@ -3608,7 +3608,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
|
||||
BlockContents properties_contents;
|
||||
|
||||
BlockFetchHelper(properties_handle, BlockType::kProperties,
|
||||
&properties_contents);
|
||||
&properties_contents);
|
||||
Block properties_block(std::move(properties_contents),
|
||||
kDisableGlobalSequenceNumber);
|
||||
|
||||
|
@ -70,7 +70,7 @@ Status SequentialFileReader::Skip(uint64_t n) {
|
||||
}
|
||||
|
||||
Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const {
|
||||
char* scratch, bool for_compaction) const {
|
||||
Status s;
|
||||
uint64_t elapsed = 0;
|
||||
{
|
||||
@ -90,7 +90,7 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
|
||||
buf.AllocateNewBuffer(read_size);
|
||||
while (buf.CurrentSize() < read_size) {
|
||||
size_t allowed;
|
||||
if (for_compaction_ && rate_limiter_ != nullptr) {
|
||||
if (for_compaction && rate_limiter_ != nullptr) {
|
||||
allowed = rate_limiter_->RequestToken(
|
||||
buf.Capacity() - buf.CurrentSize(), buf.Alignment(),
|
||||
Env::IOPriority::IO_LOW, stats_, RateLimiter::OpType::kRead);
|
||||
@ -134,7 +134,7 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
|
||||
const char* res_scratch = nullptr;
|
||||
while (pos < n) {
|
||||
size_t allowed;
|
||||
if (for_compaction_ && rate_limiter_ != nullptr) {
|
||||
if (for_compaction && rate_limiter_ != nullptr) {
|
||||
if (rate_limiter_->IsRateLimited(RateLimiter::OpType::kRead)) {
|
||||
sw.DelayStart();
|
||||
}
|
||||
@ -711,7 +711,8 @@ private:
|
||||
} // namespace
|
||||
|
||||
Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
|
||||
uint64_t offset, size_t n) {
|
||||
uint64_t offset, size_t n,
|
||||
bool for_compaction) {
|
||||
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
||||
size_t offset_ = static_cast<size_t>(offset);
|
||||
uint64_t rounddown_offset = Rounddown(offset_, alignment);
|
||||
@ -771,7 +772,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
|
||||
Slice result;
|
||||
s = reader->Read(rounddown_offset + chunk_len,
|
||||
static_cast<size_t>(roundup_len - chunk_len), &result,
|
||||
buffer_.BufferStart() + chunk_len);
|
||||
buffer_.BufferStart() + chunk_len, for_compaction);
|
||||
if (s.ok()) {
|
||||
buffer_offset_ = rounddown_offset;
|
||||
buffer_.Size(static_cast<size_t>(chunk_len) + result.size());
|
||||
@ -780,7 +781,7 @@ Status FilePrefetchBuffer::Prefetch(RandomAccessFileReader* reader,
|
||||
}
|
||||
|
||||
bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
|
||||
Slice* result) {
|
||||
Slice* result, bool for_compaction) {
|
||||
if (track_min_offset_ && offset < min_offset_read_) {
|
||||
min_offset_read_ = static_cast<size_t>(offset);
|
||||
}
|
||||
@ -797,7 +798,8 @@ bool FilePrefetchBuffer::TryReadFromCache(uint64_t offset, size_t n,
|
||||
assert(file_reader_ != nullptr);
|
||||
assert(max_readahead_size_ >= readahead_size_);
|
||||
|
||||
Status s = Prefetch(file_reader_, offset, n + readahead_size_);
|
||||
Status s =
|
||||
Prefetch(file_reader_, offset, n + readahead_size_, for_compaction);
|
||||
if (!s.ok()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -158,7 +158,8 @@ class RandomAccessFileReader {
|
||||
RandomAccessFileReader(const RandomAccessFileReader&) = delete;
|
||||
RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;
|
||||
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
|
||||
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch,
|
||||
bool for_compaction = false) const;
|
||||
|
||||
Status Prefetch(uint64_t offset, size_t n) const {
|
||||
return file_->Prefetch(offset, n);
|
||||
@ -343,7 +344,9 @@ class FilePrefetchBuffer {
|
||||
// reader : the file reader.
|
||||
// offset : the file offset to start reading from.
|
||||
// n : the number of bytes to read.
|
||||
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n);
|
||||
// for_compaction : if prefetch is done for compaction read.
|
||||
Status Prefetch(RandomAccessFileReader* reader, uint64_t offset, size_t n,
|
||||
bool for_compaction = false);
|
||||
|
||||
// Tries returning the data for a file raed from this buffer, if that data is
|
||||
// in the buffer.
|
||||
@ -354,7 +357,9 @@ class FilePrefetchBuffer {
|
||||
// offset : the file offset.
|
||||
// n : the number of bytes.
|
||||
// result : output buffer to put the data into.
|
||||
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result);
|
||||
// for_compaction : if cache read is done for compaction read.
|
||||
bool TryReadFromCache(uint64_t offset, size_t n, Slice* result,
|
||||
bool for_compaction = false);
|
||||
|
||||
// The minimum `offset` ever passed to TryReadFromCache(). This will nly be
|
||||
// tracked if track_min_offset = true.
|
||||
|
Loading…
x
Reference in New Issue
Block a user