From 8ed680bdb05ece706bb4c6e601bf3fa176345442 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan Date: Fri, 8 Jan 2021 13:24:11 -0800 Subject: [PATCH] Add new API to report dummy entries size in cache in WriteBufferManager (#7837) Summary: Add new API WriteBufferManager::dummy_entries_in_cache_usage() which reports the dummy entries size stored in cache to account for DataBlocks in WriteBufferManager. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7837 Test Plan: Updated test ./write_buffer_manager_test Reviewed By: ajkr Differential Revision: D25794312 Pulled By: akankshamahajan15 fbshipit-source-id: 197f5e8701e3dc57a7df72dab1735624f90daf4b --- HISTORY.md | 3 +++ include/rocksdb/write_buffer_manager.h | 4 ++++ memtable/write_buffer_manager.cc | 3 +++ memtable/write_buffer_manager_test.cc | 25 ++++++++++++++++++++++--- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 40a0141a7..3154335ba 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,9 @@ * When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. * Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`. +### Public API Change +* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks. + ## 6.16.0 (12/18/2020) ### Behavior Changes * Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. diff --git a/include/rocksdb/write_buffer_manager.h b/include/rocksdb/write_buffer_manager.h index ae1c98caf..c1d1300ae 100644 --- a/include/rocksdb/write_buffer_manager.h +++ b/include/rocksdb/write_buffer_manager.h @@ -43,6 +43,9 @@ class WriteBufferManager { size_t mutable_memtable_memory_usage() const { return memory_active_.load(std::memory_order_relaxed); } + size_t dummy_entries_in_cache_usage() const { + return dummy_size_.load(std::memory_order_relaxed); + } size_t buffer_size() const { return buffer_size_; } // Should only be called from write thread @@ -93,6 +96,7 @@ class WriteBufferManager { std::atomic memory_used_; // Memory that hasn't been scheduled to free. std::atomic memory_active_; + std::atomic dummy_size_; struct CacheRep; std::unique_ptr cache_rep_; diff --git a/memtable/write_buffer_manager.cc b/memtable/write_buffer_manager.cc index 9b7470870..f6451032a 100644 --- a/memtable/write_buffer_manager.cc +++ b/memtable/write_buffer_manager.cc @@ -54,6 +54,7 @@ WriteBufferManager::WriteBufferManager(size_t _buffer_size, mutable_limit_(buffer_size_ * 7 / 8), memory_used_(0), memory_active_(0), + dummy_size_(0), cache_rep_(nullptr) { #ifndef ROCKSDB_LITE if (cache) { @@ -104,6 +105,7 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) { // it in the future. cache_rep_->dummy_handles_.push_back(handle); cache_rep_->cache_allocated_size_ += kSizeDummyEntry; + dummy_size_.fetch_add(kSizeDummyEntry, std::memory_order_relaxed); } #else (void)mem; @@ -137,6 +139,7 @@ void WriteBufferManager::FreeMemWithCache(size_t mem) { } cache_rep_->dummy_handles_.pop_back(); cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; + dummy_size_.fetch_sub(kSizeDummyEntry, std::memory_order_relaxed); } #else (void)mem; diff --git a/memtable/write_buffer_manager_test.cc b/memtable/write_buffer_manager_test.cc index 0cdd7c478..e9377a2a7 100644 --- a/memtable/write_buffer_manager_test.cc +++ b/memtable/write_buffer_manager_test.cc @@ -11,7 +11,7 @@ #include "test_util/testharness.h" namespace ROCKSDB_NAMESPACE { - +const size_t kSizeDummyEntry = 256 * 1024; class WriteBufferManagerTest : public testing::Test {}; #ifndef ROCKSDB_LITE @@ -65,28 +65,35 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->ReserveMem(333 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000); + // 2 dummy entries are added for size 333 kb. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 2 * kSizeDummyEntry); // Allocate another 512KB wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000); + // 2 more dummy entries are added for size 512. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); // Allocate another 10MB wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); + // 40 more entries are added for size 10 * 1024 * 1024. + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); // Free 1MB will not cause any change in cache cost wbf->FreeMem(1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); - + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); // Allocate another 41MB wbf->ReserveMem(41 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush()); @@ -94,7 +101,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->ScheduleFreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); - + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry); // Still need flush as the hard limit hits ASSERT_TRUE(wbf->ShouldFlush()); @@ -102,6 +109,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->FreeMem(20 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 203 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); @@ -109,19 +117,23 @@ TEST_F(WriteBufferManagerTest, CacheCost) { wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 202 * kSizeDummyEntry); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry); // Reserve 512KB will not cause any change in cache cost wbf->ReserveMem(512 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry); wbf->FreeMem(16 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 200 * kSizeDummyEntry); // Destory write buffer manger should free everything wbf.reset(); @@ -137,6 +149,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 10 * 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); ASSERT_FALSE(wbf->ShouldFlush()); wbf->FreeMem(9 * 1024 * 1024); @@ -145,6 +158,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) { } ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry); } TEST_F(WriteBufferManagerTest, CacheFull) { @@ -156,16 +170,20 @@ TEST_F(WriteBufferManagerTest, CacheFull) { std::shared_ptr cache = NewLRUCache(lo); std::unique_ptr wbf(new WriteBufferManager(0, cache)); wbf->ReserveMem(10 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry); size_t prev_pinned = cache->GetPinnedUsage(); ASSERT_GE(prev_pinned, 10 * 1024 * 1024); + // Some insert will fail wbf->ReserveMem(10 * 1024 * 1024); ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 80 * kSizeDummyEntry); // Increase capacity so next insert will succeed cache->SetCapacity(30 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024); ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 120 * kSizeDummyEntry); // Gradually release 20 MB for (int i = 0; i < 40; i++) { @@ -173,6 +191,7 @@ TEST_F(WriteBufferManagerTest, CacheFull) { } ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024); + ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 95 * kSizeDummyEntry); } #endif // ROCKSDB_LITE