dynamically change current memtable size
Summary: Previously setting `write_buffer_size` with `SetOptions` would only apply to new memtables. An internal user wanted it to take effect immediately, instead of at an arbitrary future point, to prevent OOM. This PR makes the memtable's size mutable, and makes `SetOptions()` mutate it. There is one case when we preserve the old behavior, which is when memtable prefix bloom filter is enabled and the user is increasing the memtable's capacity. That's because the prefix bloom filter's size is fixed and wouldn't work as well on a larger memtable. Closes https://github.com/facebook/rocksdb/pull/3119 Differential Revision: D6228304 Pulled By: ajkr fbshipit-source-id: e44bd9d10a5f8c9d8c464bf7436070bb3eafdfc9
This commit is contained in:
parent
30e4e01e05
commit
c4c1f961e7
@ -949,6 +949,10 @@ void ColumnFamilyData::InstallSuperVersion(
|
||||
RecalculateWriteStallConditions(mutable_cf_options);
|
||||
|
||||
if (old_superversion != nullptr) {
|
||||
if (old_superversion->mutable_cf_options.write_buffer_size !=
|
||||
mutable_cf_options.write_buffer_size) {
|
||||
mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size);
|
||||
}
|
||||
if (old_superversion->write_stall_condition !=
|
||||
new_superversion->write_stall_condition) {
|
||||
sv_context->PushWriteStallNotification(
|
||||
|
@ -3354,11 +3354,23 @@ TEST_F(DBTest, DynamicMemtableOptions) {
|
||||
{"write_buffer_size", "131072"},
|
||||
}));
|
||||
|
||||
// The existing memtable is still 64KB in size, after it becomes immutable,
|
||||
// the next memtable will be 128KB in size. Write 256KB total, we should
|
||||
// have a 64KB L0 file, a 128KB L0 file, and a memtable with 64KB data
|
||||
gen_l0_kb(256);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 2); // (A)
|
||||
// The existing memtable inflated 64KB->128KB when we invoked SetOptions().
|
||||
// Write 192KB, we should have a 128KB L0 file and a memtable with 64KB data.
|
||||
gen_l0_kb(192);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 1); // (A)
|
||||
ASSERT_LT(SizeAtLevel(0), k128KB + 2 * k5KB);
|
||||
ASSERT_GT(SizeAtLevel(0), k128KB - 4 * k5KB);
|
||||
|
||||
// Decrease buffer size below current usage
|
||||
ASSERT_OK(dbfull()->SetOptions({
|
||||
{"write_buffer_size", "65536"},
|
||||
}));
|
||||
// The existing memtable became eligible for flush when we reduced its
|
||||
// capacity to 64KB. Two keys need to be added to trigger flush: first causes
|
||||
// memtable to be marked full, second schedules the flush. Then we should have
|
||||
// a 128KB L0 file, a 64KB L0 file, and a memtable with just one key.
|
||||
gen_l0_kb(2);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 2);
|
||||
ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
|
||||
ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
|
||||
|
||||
|
@ -39,10 +39,10 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
MemTableOptions::MemTableOptions(const ImmutableCFOptions& ioptions,
|
||||
ImmutableMemTableOptions::ImmutableMemTableOptions(
|
||||
const ImmutableCFOptions& ioptions,
|
||||
const MutableCFOptions& mutable_cf_options)
|
||||
: write_buffer_size(mutable_cf_options.write_buffer_size),
|
||||
arena_block_size(mutable_cf_options.arena_block_size),
|
||||
: arena_block_size(mutable_cf_options.arena_block_size),
|
||||
memtable_prefix_bloom_bits(
|
||||
static_cast<uint32_t>(
|
||||
static_cast<double>(mutable_cf_options.write_buffer_size) *
|
||||
@ -83,6 +83,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
||||
data_size_(0),
|
||||
num_entries_(0),
|
||||
num_deletes_(0),
|
||||
write_buffer_size_(mutable_cf_options.write_buffer_size),
|
||||
flush_in_progress_(false),
|
||||
flush_completed_(false),
|
||||
file_number_(0),
|
||||
@ -136,6 +137,7 @@ size_t MemTable::ApproximateMemoryUsage() {
|
||||
}
|
||||
|
||||
bool MemTable::ShouldFlushNow() const {
|
||||
size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed);
|
||||
// In a lot of times, we cannot allocate arena blocks that exactly matches the
|
||||
// buffer size. Thus we have to decide if we should over-allocate or
|
||||
// under-allocate.
|
||||
@ -153,16 +155,14 @@ bool MemTable::ShouldFlushNow() const {
|
||||
// if we can still allocate one more block without exceeding the
|
||||
// over-allocation ratio, then we should not flush.
|
||||
if (allocated_memory + kArenaBlockSize <
|
||||
moptions_.write_buffer_size +
|
||||
kArenaBlockSize * kAllowOverAllocationRatio) {
|
||||
write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// if user keeps adding entries that exceeds moptions.write_buffer_size,
|
||||
// we need to flush earlier even though we still have much available
|
||||
// memory left.
|
||||
if (allocated_memory > moptions_.write_buffer_size +
|
||||
kArenaBlockSize * kAllowOverAllocationRatio) {
|
||||
// if user keeps adding entries that exceeds write_buffer_size, we need to
|
||||
// flush earlier even though we still have much available memory left.
|
||||
if (allocated_memory >
|
||||
write_buffer_size + kArenaBlockSize * kAllowOverAllocationRatio) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -265,7 +265,8 @@ class MemTableIterator : public InternalIterator {
|
||||
comparator_(mem.comparator_),
|
||||
valid_(false),
|
||||
arena_mode_(arena != nullptr),
|
||||
value_pinned_(!mem.GetMemTableOptions()->inplace_update_support) {
|
||||
value_pinned_(
|
||||
!mem.GetImmutableMemTableOptions()->inplace_update_support) {
|
||||
if (use_range_del_table) {
|
||||
iter_ = mem.range_del_table_->GetIterator(arena);
|
||||
} else if (prefix_extractor_ != nullptr && !read_options.total_order_seek) {
|
||||
|
@ -36,11 +36,9 @@ class MemTableIterator;
|
||||
class MergeContext;
|
||||
class InternalIterator;
|
||||
|
||||
struct MemTableOptions {
|
||||
explicit MemTableOptions(
|
||||
const ImmutableCFOptions& ioptions,
|
||||
struct ImmutableMemTableOptions {
|
||||
explicit ImmutableMemTableOptions(const ImmutableCFOptions& ioptions,
|
||||
const MutableCFOptions& mutable_cf_options);
|
||||
size_t write_buffer_size;
|
||||
size_t arena_block_size;
|
||||
uint32_t memtable_prefix_bloom_bits;
|
||||
size_t memtable_huge_page_size;
|
||||
@ -262,6 +260,18 @@ class MemTable {
|
||||
return num_deletes_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Dynamically change the memtable's capacity. If set below the current usage,
|
||||
// the next key added will trigger a flush. Can only increase size when
|
||||
// memtable prefix bloom is disabled, since we can't easily allocate more
|
||||
// space.
|
||||
void UpdateWriteBufferSize(size_t new_write_buffer_size) {
|
||||
if (prefix_bloom_ == nullptr ||
|
||||
new_write_buffer_size < write_buffer_size_) {
|
||||
write_buffer_size_.store(new_write_buffer_size,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the edits area that is needed for flushing the memtable
|
||||
VersionEdit* GetEdits() { return &edit_; }
|
||||
|
||||
@ -350,7 +360,9 @@ class MemTable {
|
||||
return comparator_.comparator;
|
||||
}
|
||||
|
||||
const MemTableOptions* GetMemTableOptions() const { return &moptions_; }
|
||||
const ImmutableMemTableOptions* GetImmutableMemTableOptions() const {
|
||||
return &moptions_;
|
||||
}
|
||||
|
||||
uint64_t ApproximateOldestKeyTime() const {
|
||||
return oldest_key_time_.load(std::memory_order_relaxed);
|
||||
@ -364,7 +376,7 @@ class MemTable {
|
||||
friend class MemTableList;
|
||||
|
||||
KeyComparator comparator_;
|
||||
const MemTableOptions moptions_;
|
||||
const ImmutableMemTableOptions moptions_;
|
||||
int refs_;
|
||||
const size_t kArenaBlockSize;
|
||||
AllocTracker mem_tracker_;
|
||||
@ -378,6 +390,9 @@ class MemTable {
|
||||
std::atomic<uint64_t> num_entries_;
|
||||
std::atomic<uint64_t> num_deletes_;
|
||||
|
||||
// Dynamically changeable memtable option
|
||||
std::atomic<size_t> write_buffer_size_;
|
||||
|
||||
// These are used to manage memtable flushes to storage
|
||||
bool flush_in_progress_; // started the flush
|
||||
bool flush_completed_; // finished the flush
|
||||
|
@ -1035,7 +1035,7 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
}
|
||||
|
||||
MemTable* mem = cf_mems_->GetMemTable();
|
||||
auto* moptions = mem->GetMemTableOptions();
|
||||
auto* moptions = mem->GetImmutableMemTableOptions();
|
||||
if (!moptions->inplace_update_support) {
|
||||
mem->Add(sequence_, value_type, key, value, concurrent_memtable_writes_,
|
||||
get_post_process_info(mem));
|
||||
@ -1196,7 +1196,7 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
}
|
||||
|
||||
MemTable* mem = cf_mems_->GetMemTable();
|
||||
auto* moptions = mem->GetMemTableOptions();
|
||||
auto* moptions = mem->GetImmutableMemTableOptions();
|
||||
bool perform_merge = false;
|
||||
|
||||
// If we pass DB through and options.max_successive_merges is hit
|
||||
|
Loading…
x
Reference in New Issue
Block a user