From a50da404bebeae2006c866c4b914fba106c0c4d6 Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 21 Jun 2021 21:22:57 -0700 Subject: [PATCH] Fix a tsan warning due to reading flags in LRUHandle without holding a mutex (#8433) Summary: Tsan complains due to a perceived race condition in accessing LRUHandle flags. One thread calls ```LRUHandle::SetHit()``` from ```LRUCacheShard::Lookup()```, while another thread calls ```LRUHandle::IsPending()``` from ```LRUCacheShard::IsReady()```. The latter call is from ```MultiGet```. It doesn't actually have to call ```IsReady``` since a null value indicates the cache handle is not ready, so its sufficient to check for a null value. Also modify ```IsReady``` to acquire the LRU shard mutex. Tests: 1. make check 2. Run tsan_crash Pull Request resolved: https://github.com/facebook/rocksdb/pull/8433 Reviewed By: zhichao-cao Differential Revision: D29278030 Pulled By: anand1976 fbshipit-source-id: 0c9fed56d12eda853e72dadebe75038361bd257f --- cache/lru_cache.cc | 7 ++----- table/block_based/block_based_table_reader.cc | 21 ++++++++----------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index a7310d8dc..f7da46b69 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -596,15 +596,12 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) { bool LRUCacheShard::IsReady(Cache::Handle* handle) { LRUHandle* e = reinterpret_cast(handle); + MutexLock l(&mutex_); bool ready = true; if (e->IsPending()) { assert(secondary_cache_); assert(e->sec_handle); - if (e->sec_handle->IsReady()) { - Promote(e); - } else { - ready = false; - } + ready = e->sec_handle->IsReady(); } return ready; } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 7392a0d0b..acb58138d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2564,24 +2564,21 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, s = Status::OK(); } if (s.ok() && !results.back().IsEmpty()) { - if (results.back().IsReady()) { + // Since we have a valid handle, check the value. If its nullptr, + // it means the cache is waiting for the final result and we're + // supposed to call WaitAll() to wait for the result. + if (results.back().GetValue() != nullptr) { // Found it in the cache. Add NULL handle to indicate there is // nothing to read from disk. if (results.back().GetCacheHandle()) { results.back().UpdateCachedValue(); - // Its possible the cache lookup returned a non-null handle, - // but the lookup actually failed to produce a valid value - if (results.back().GetValue() == nullptr) { - block_handles.emplace_back(handle); - total_len += block_size(handle); - } - } - if (results.back().GetValue() != nullptr) { - block_handles.emplace_back(BlockHandle::NullBlockHandle()); } + block_handles.emplace_back(BlockHandle::NullBlockHandle()); } else { - // We have to wait for the asynchronous cache lookup to finish, - // and then we may have to read the block from disk anyway + // We have to wait for the cache lookup to finish in the + // background, and then we may have to read the block from disk + // anyway + assert(results.back().GetCacheHandle()); wait_for_cache_results = true; block_handles.emplace_back(handle); cache_handles.emplace_back(results.back().GetCacheHandle());