Fix a tsan warning due to reading flags in LRUHandle without holding a mutex (#8433)

Summary:
Tsan complains due to a perceived race condition in accessing LRUHandle flags. One thread calls ```LRUHandle::SetHit()``` from ```LRUCacheShard::Lookup()```, while another thread calls ```LRUHandle::IsPending()``` from ```LRUCacheShard::IsReady()```. The latter call is from ```MultiGet```. It doesn't actually have to call ```IsReady``` since a null value indicates the cache handle is not ready, so its sufficient to check for a null value.

Also modify ```IsReady``` to acquire the LRU shard mutex.

Tests:
1. make check
2. Run tsan_crash

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8433

Reviewed By: zhichao-cao

Differential Revision: D29278030

Pulled By: anand1976

fbshipit-source-id: 0c9fed56d12eda853e72dadebe75038361bd257f
This commit is contained in:
anand76 2021-06-21 21:22:57 -07:00 committed by Facebook GitHub Bot
parent e9b627fd56
commit a50da404be
2 changed files with 11 additions and 17 deletions

7
cache/lru_cache.cc vendored
View File

@ -596,15 +596,12 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
bool LRUCacheShard::IsReady(Cache::Handle* handle) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
MutexLock l(&mutex_);
bool ready = true;
if (e->IsPending()) {
assert(secondary_cache_);
assert(e->sec_handle);
if (e->sec_handle->IsReady()) {
Promote(e);
} else {
ready = false;
}
ready = e->sec_handle->IsReady();
}
return ready;
}

View File

@ -2564,24 +2564,21 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
s = Status::OK();
}
if (s.ok() && !results.back().IsEmpty()) {
if (results.back().IsReady()) {
// Since we have a valid handle, check the value. If its nullptr,
// it means the cache is waiting for the final result and we're
// supposed to call WaitAll() to wait for the result.
if (results.back().GetValue() != nullptr) {
// Found it in the cache. Add NULL handle to indicate there is
// nothing to read from disk.
if (results.back().GetCacheHandle()) {
results.back().UpdateCachedValue();
// Its possible the cache lookup returned a non-null handle,
// but the lookup actually failed to produce a valid value
if (results.back().GetValue() == nullptr) {
block_handles.emplace_back(handle);
total_len += block_size(handle);
}
}
if (results.back().GetValue() != nullptr) {
block_handles.emplace_back(BlockHandle::NullBlockHandle());
}
} else {
// We have to wait for the asynchronous cache lookup to finish,
// and then we may have to read the block from disk anyway
// We have to wait for the cache lookup to finish in the
// background, and then we may have to read the block from disk
// anyway
assert(results.back().GetCacheHandle());
wait_for_cache_results = true;
block_handles.emplace_back(handle);
cache_handles.emplace_back(results.back().GetCacheHandle());