From 77c7085594a4a09efc2dc0a9154f31f6dae5e551 Mon Sep 17 00:00:00 2001 From: lgqss Date: Thu, 2 Dec 2021 11:44:29 -0800 Subject: [PATCH] MemTableList::TrimHistory now use allocated bytes (#9020) Summary: Fix a bug when both max_write_buffer_size_to_maintain and max_write_buffer_number_to_maintain are 0. The bug was introduced in 6.5.0 and https://github.com/facebook/rocksdb/issues/5022. Fix https://github.com/facebook/rocksdb/issues/8371 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9020 Reviewed By: pdillinger Differential Revision: D32767084 Pulled By: ajkr fbshipit-source-id: c401ee6e2557230e892d0fe8abb4966cbd18e85f --- HISTORY.md | 1 + db/db_impl/db_impl_write.cc | 2 +- db/memtable.h | 7 ++++++ db/memtable_list.cc | 22 +++++++++---------- db/memtable_list.h | 14 ++++++------ db/memtable_list_test.cc | 2 +- db/write_batch.cc | 4 ++-- .../optimistic_transaction_test.cc | 3 +-- 8 files changed, 31 insertions(+), 24 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index b37408264..a64cf75a4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ * Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next. ### Behavior Changes +* MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371. ### Public API change * Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes. diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index a8fdbda96..8e98a758a 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1661,7 +1661,7 @@ Status DBImpl::TrimMemtableHistory(WriteContext* context) { for (auto& cfd : cfds) { autovector to_delete; bool trimmed = cfd->imm()->TrimHistory( - &context->memtables_to_free_, cfd->mem()->ApproximateMemoryUsage()); + &context->memtables_to_free_, cfd->mem()->MemoryAllocatedBytes()); if (trimmed) { context->superversion_context.NewSuperVersion(); assert(context->superversion_context.new_superversion.get() != nullptr); diff --git a/db/memtable.h b/db/memtable.h index d93058066..3a2a97aca 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -146,6 +146,13 @@ class MemTable { return approximate_memory_usage_.load(std::memory_order_relaxed); } + // used by MemTableListVersion::MemoryAllocatedBytesExcludingLast + size_t MemoryAllocatedBytes() const { + return table_->ApproximateMemoryUsage() + + range_del_table_->ApproximateMemoryUsage() + + arena_.MemoryAllocatedBytes(); + } + // Returns a vector of unique random memtable entries of size 'sample_size'. // // Note: the entries are stored in the unordered_set as length-prefixed keys, diff --git a/db/memtable_list.cc b/db/memtable_list.cc index 4762b7f38..8c53d92ca 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -259,8 +259,8 @@ SequenceNumber MemTableListVersion::GetEarliestSequenceNumber( void MemTableListVersion::Add(MemTable* m, autovector* to_delete) { assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable AddMemTable(m); - - TrimHistory(to_delete, m->ApproximateMemoryUsage()); + // m->MemoryAllocatedBytes() is added in MemoryAllocatedBytesExcludingLast + TrimHistory(to_delete, 0); } // Removes m from list of memtables not flushed. Caller should NOT Unref m. @@ -282,16 +282,16 @@ void MemTableListVersion::Remove(MemTable* m, } // return the total memory usage assuming the oldest flushed memtable is dropped -size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() const { +size_t MemTableListVersion::MemoryAllocatedBytesExcludingLast() const { size_t total_memtable_size = 0; for (auto& memtable : memlist_) { - total_memtable_size += memtable->ApproximateMemoryUsage(); + total_memtable_size += memtable->MemoryAllocatedBytes(); } for (auto& memtable : memlist_history_) { - total_memtable_size += memtable->ApproximateMemoryUsage(); + total_memtable_size += memtable->MemoryAllocatedBytes(); } if (!memlist_history_.empty()) { - total_memtable_size -= memlist_history_.back()->ApproximateMemoryUsage(); + total_memtable_size -= memlist_history_.back()->MemoryAllocatedBytes(); } return total_memtable_size; } @@ -301,7 +301,7 @@ bool MemTableListVersion::MemtableLimitExceeded(size_t usage) { // calculate the total memory usage after dropping the oldest flushed // memtable, compare with max_write_buffer_size_to_maintain_ to decide // whether to trim history - return ApproximateMemoryUsageExcludingLast() + usage >= + return MemoryAllocatedBytesExcludingLast() + usage >= static_cast(max_write_buffer_size_to_maintain_); } else if (max_write_buffer_number_to_maintain_ > 0) { return memlist_.size() + memlist_history_.size() > @@ -596,9 +596,9 @@ size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() { size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; } -size_t MemTableList::ApproximateMemoryUsageExcludingLast() const { +size_t MemTableList::MemoryAllocatedBytesExcludingLast() const { const size_t usage = - current_memory_usage_excluding_last_.load(std::memory_order_relaxed); + current_memory_allocted_bytes_excluding_last_.load(std::memory_order_relaxed); return usage; } @@ -609,8 +609,8 @@ bool MemTableList::HasHistory() const { void MemTableList::UpdateCachedValuesFromMemTableListVersion() { const size_t total_memtable_size = - current_->ApproximateMemoryUsageExcludingLast(); - current_memory_usage_excluding_last_.store(total_memtable_size, + current_->MemoryAllocatedBytesExcludingLast(); + current_memory_allocted_bytes_excluding_last_.store(total_memtable_size, std::memory_order_relaxed); const bool has_history = current_->HasHistory(); diff --git a/db/memtable_list.h b/db/memtable_list.h index 88d3e96cc..f34cd8d76 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -168,7 +168,7 @@ class MemTableListVersion { // excluding the last MemTable in memlist_history_. The reason for excluding // the last MemTable is to see if dropping the last MemTable will keep total // memory usage above or equal to max_write_buffer_size_to_maintain_ - size_t ApproximateMemoryUsageExcludingLast() const; + size_t MemoryAllocatedBytesExcludingLast() const; // Whether this version contains flushed memtables that are only kept around // for transaction conflict checking. @@ -221,7 +221,7 @@ class MemTableList { commit_in_progress_(false), flush_requested_(false), current_memory_usage_(0), - current_memory_usage_excluding_last_(0), + current_memory_allocted_bytes_excluding_last_(0), current_has_history_(false) { current_->Ref(); } @@ -281,13 +281,13 @@ class MemTableList { // Returns an estimate of the number of bytes of data in use. size_t ApproximateMemoryUsage(); - // Returns the cached current_memory_usage_excluding_last_ value. - size_t ApproximateMemoryUsageExcludingLast() const; + // Returns the cached current_memory_allocted_bytes_excluding_last_ value. + size_t MemoryAllocatedBytesExcludingLast() const; // Returns the cached current_has_history_ value. bool HasHistory() const; - // Updates current_memory_usage_excluding_last_ and current_has_history_ + // Updates current_memory_allocted_bytes_excluding_last_ and current_has_history_ // from MemTableListVersion. Must be called whenever InstallNewVersion is // called. void UpdateCachedValuesFromMemTableListVersion(); @@ -430,8 +430,8 @@ class MemTableList { // The current memory usage. size_t current_memory_usage_; - // Cached value of current_->ApproximateMemoryUsageExcludingLast(). - std::atomic current_memory_usage_excluding_last_; + // Cached value of current_->MemoryAllocatedBytesExcludingLast(). + std::atomic current_memory_allocted_bytes_excluding_last_; // Cached value of current_->HasHistory(). std::atomic current_has_history_; diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 18b5f57e9..e801ac04f 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -344,7 +344,7 @@ TEST_F(MemTableListTest, GetFromHistoryTest) { // Create MemTableList int min_write_buffer_number_to_merge = 2; int max_write_buffer_number_to_maintain = 2; - int64_t max_write_buffer_size_to_maintain = 2000; + int64_t max_write_buffer_size_to_maintain = 2 * Arena::kInlineSize; MemTableList list(min_write_buffer_number_to_merge, max_write_buffer_number_to_maintain, max_write_buffer_size_to_maintain); diff --git a/db/write_batch.cc b/db/write_batch.cc index d96dbf111..13ec947b7 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -1986,8 +1986,8 @@ class MemTableInserter : public WriteBatch::Handler { const MemTable* const mem = cfd->mem(); assert(mem); - if (mem->ApproximateMemoryUsageFast() + - imm->ApproximateMemoryUsageExcludingLast() >= + if (mem->MemoryAllocatedBytes() + + imm->MemoryAllocatedBytesExcludingLast() >= size_to_maintain && imm->MarkTrimHistoryNeeded()) { trim_history_scheduler_->ScheduleWork(cfd); diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 138823b65..26e9c0b1e 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -37,7 +37,7 @@ class OptimisticTransactionTest OptimisticTransactionTest() { options.create_if_missing = true; options.max_write_buffer_number = 2; - options.max_write_buffer_size_to_maintain = 1600; + options.max_write_buffer_size_to_maintain = 2 * Arena::kInlineSize; options.merge_operator.reset(new TestPutOperator()); dbname = test::PerThreadDBPath("optimistic_transaction_testdb"); @@ -318,7 +318,6 @@ TEST_P(OptimisticTransactionTest, CheckKeySkipOldMemtable) { const int kAttemptImmMemTable = 1; for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable; attempt++) { - options.max_write_buffer_number_to_maintain = 3; Reopen(); WriteOptions write_options;