3c327ac2d0
Summary: Closes https://github.com/facebook/rocksdb/pull/2589 Differential Revision: D5431502 Pulled By: siying fbshipit-source-id: 8ebf8c87883daa9daa54b2303d11ce01ab1f6f75
547 lines
18 KiB
C++
547 lines
18 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "util/thread_local.h"
|
|
#include "util/mutexlock.h"
|
|
#include "port/likely.h"
|
|
#include <stdlib.h>
|
|
|
|
namespace rocksdb {
|
|
|
|
struct Entry {
|
|
Entry() : ptr(nullptr) {}
|
|
Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
|
|
std::atomic<void*> ptr;
|
|
};
|
|
|
|
class StaticMeta;
|
|
|
|
// This is the structure that is declared as "thread_local" storage.
|
|
// The vector keep list of atomic pointer for all instances for "current"
|
|
// thread. The vector is indexed by an Id that is unique in process and
|
|
// associated with one ThreadLocalPtr instance. The Id is assigned by a
|
|
// global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
|
|
// instances, each thread will have a ThreadData with a vector of size 3:
|
|
// ---------------------------------------------------
|
|
// | | instance 1 | instance 2 | instnace 3 |
|
|
// ---------------------------------------------------
|
|
// | thread 1 | void* | void* | void* | <- ThreadData
|
|
// ---------------------------------------------------
|
|
// | thread 2 | void* | void* | void* | <- ThreadData
|
|
// ---------------------------------------------------
|
|
// | thread 3 | void* | void* | void* | <- ThreadData
|
|
// ---------------------------------------------------
|
|
struct ThreadData {
|
|
explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst) : entries(), inst(_inst) {}
|
|
std::vector<Entry> entries;
|
|
ThreadData* next;
|
|
ThreadData* prev;
|
|
ThreadLocalPtr::StaticMeta* inst;
|
|
};
|
|
|
|
class ThreadLocalPtr::StaticMeta {
|
|
public:
|
|
StaticMeta();
|
|
|
|
// Return the next available Id
|
|
uint32_t GetId();
|
|
// Return the next available Id without claiming it
|
|
uint32_t PeekId() const;
|
|
// Return the given Id back to the free pool. This also triggers
|
|
// UnrefHandler for associated pointer value (if not NULL) for all threads.
|
|
void ReclaimId(uint32_t id);
|
|
|
|
// Return the pointer value for the given id for the current thread.
|
|
void* Get(uint32_t id) const;
|
|
// Reset the pointer value for the given id for the current thread.
|
|
void Reset(uint32_t id, void* ptr);
|
|
// Atomically swap the supplied ptr and return the previous value
|
|
void* Swap(uint32_t id, void* ptr);
|
|
// Atomically compare and swap the provided value only if it equals
|
|
// to expected value.
|
|
bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
|
|
// Reset all thread local data to replacement, and return non-nullptr
|
|
// data for all existing threads
|
|
void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
|
|
// Update res by applying func on each thread-local value. Holds a lock that
|
|
// prevents unref handler from running during this call, but clients must
|
|
// still provide external synchronization since the owning thread can
|
|
// access the values without internal locking, e.g., via Get() and Reset().
|
|
void Fold(uint32_t id, FoldFunc func, void* res);
|
|
|
|
// Register the UnrefHandler for id
|
|
void SetHandler(uint32_t id, UnrefHandler handler);
|
|
|
|
// protect inst, next_instance_id_, free_instance_ids_, head_,
|
|
// ThreadData.entries
|
|
//
|
|
// Note that here we prefer function static variable instead of the usual
|
|
// global static variable. The reason is that c++ destruction order of
|
|
// static variables in the reverse order of their construction order.
|
|
// However, C++ does not guarantee any construction order when global
|
|
// static variables are defined in different files, while the function
|
|
// static variables are initialized when their function are first called.
|
|
// As a result, the construction order of the function static variables
|
|
// can be controlled by properly invoke their first function calls in
|
|
// the right order.
|
|
//
|
|
// For instance, the following function contains a function static
|
|
// variable. We place a dummy function call of this inside
|
|
// Env::Default() to ensure the construction order of the construction
|
|
// order.
|
|
static port::Mutex* Mutex();
|
|
|
|
// Returns the member mutex of the current StaticMeta. In general,
|
|
// Mutex() should be used instead of this one. However, in case where
|
|
// the static variable inside Instance() goes out of scope, MemberMutex()
|
|
// should be used. One example is OnThreadExit() function.
|
|
port::Mutex* MemberMutex() { return &mutex_; }
|
|
|
|
private:
|
|
// Get UnrefHandler for id with acquiring mutex
|
|
// REQUIRES: mutex locked
|
|
UnrefHandler GetHandler(uint32_t id);
|
|
|
|
// Triggered before a thread terminates
|
|
static void OnThreadExit(void* ptr);
|
|
|
|
// Add current thread's ThreadData to the global chain
|
|
// REQUIRES: mutex locked
|
|
void AddThreadData(ThreadData* d);
|
|
|
|
// Remove current thread's ThreadData from the global chain
|
|
// REQUIRES: mutex locked
|
|
void RemoveThreadData(ThreadData* d);
|
|
|
|
static ThreadData* GetThreadLocal();
|
|
|
|
uint32_t next_instance_id_;
|
|
// Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
|
|
// frequently. This also prevents it from blowing up the vector space.
|
|
autovector<uint32_t> free_instance_ids_;
|
|
// Chain all thread local structure together. This is necessary since
|
|
// when one ThreadLocalPtr gets destroyed, we need to loop over each
|
|
// thread's version of pointer corresponding to that instance and
|
|
// call UnrefHandler for it.
|
|
ThreadData head_;
|
|
|
|
std::unordered_map<uint32_t, UnrefHandler> handler_map_;
|
|
|
|
// The private mutex. Developers should always use Mutex() instead of
|
|
// using this variable directly.
|
|
port::Mutex mutex_;
|
|
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
|
// Thread local storage
|
|
static __thread ThreadData* tls_;
|
|
#endif
|
|
|
|
// Used to make thread exit trigger possible if !defined(OS_MACOSX).
|
|
// Otherwise, used to retrieve thread data.
|
|
pthread_key_t pthread_key_;
|
|
};
|
|
|
|
|
|
#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
|
|
__thread ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
|
|
#endif
|
|
|
|
// Windows doesn't support a per-thread destructor with its
|
|
// TLS primitives. So, we build it manually by inserting a
|
|
// function to be called on each thread's exit.
|
|
// See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
|
|
// and http://www.nynaeve.net/?p=183
|
|
//
|
|
// really we do this to have clear conscience since using TLS with thread-pools
|
|
// is iffy
|
|
// although OK within a request. But otherwise, threads have no identity in its
|
|
// modern use.
|
|
|
|
// This runs on windows only called from the System Loader
|
|
#ifdef OS_WIN
|
|
|
|
// Windows cleanup routine is invoked from a System Loader with a different
|
|
// signature so we can not directly hookup the original OnThreadExit which is
|
|
// private member
|
|
// so we make StaticMeta class share with the us the address of the function so
|
|
// we can invoke it.
|
|
namespace wintlscleanup {
|
|
|
|
// This is set to OnThreadExit in StaticMeta singleton constructor
|
|
UnrefHandler thread_local_inclass_routine = nullptr;
|
|
pthread_key_t thread_local_key = -1;
|
|
|
|
// Static callback function to call with each thread termination.
|
|
void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
|
|
// We decided to punt on PROCESS_EXIT
|
|
if (DLL_THREAD_DETACH == reason) {
|
|
if (thread_local_key != pthread_key_t(-1) && thread_local_inclass_routine != nullptr) {
|
|
void* tls = pthread_getspecific(thread_local_key);
|
|
if (tls != nullptr) {
|
|
thread_local_inclass_routine(tls);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // wintlscleanup
|
|
|
|
// extern "C" suppresses C++ name mangling so we know the symbol name for the
|
|
// linker /INCLUDE:symbol pragma above.
|
|
extern "C" {
|
|
|
|
#ifdef _MSC_VER
|
|
// The linker must not discard thread_callback_on_exit. (We force a reference
|
|
// to this variable with a linker /include:symbol pragma to ensure that.) If
|
|
// this variable is discarded, the OnThreadExit function will never be called.
|
|
#ifdef _WIN64
|
|
|
|
// .CRT section is merged with .rdata on x64 so it must be constant data.
|
|
#pragma const_seg(".CRT$XLB")
|
|
// When defining a const variable, it must have external linkage to be sure the
|
|
// linker doesn't discard it.
|
|
extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
|
|
const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
|
|
wintlscleanup::WinOnThreadExit;
|
|
// Reset the default section.
|
|
#pragma const_seg()
|
|
|
|
#pragma comment(linker, "/include:_tls_used")
|
|
#pragma comment(linker, "/include:p_thread_callback_on_exit")
|
|
|
|
#else // _WIN64
|
|
|
|
#pragma data_seg(".CRT$XLB")
|
|
PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
|
|
// Reset the default section.
|
|
#pragma data_seg()
|
|
|
|
#pragma comment(linker, "/INCLUDE:__tls_used")
|
|
#pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
|
|
|
|
#endif // _WIN64
|
|
|
|
#else
|
|
// https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
|
|
BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
|
|
if (dwReason == DLL_THREAD_DETACH)
|
|
wintlscleanup::WinOnThreadExit(h, dwReason, pv);
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
} // extern "C"
|
|
|
|
#endif // OS_WIN
|
|
|
|
void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
|
|
|
|
ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
|
|
// Here we prefer function static variable instead of global
|
|
// static variable as function static variable is initialized
|
|
// when the function is first call. As a result, we can properly
|
|
// control their construction order by properly preparing their
|
|
// first function call.
|
|
//
|
|
// Note that here we decide to make "inst" a static pointer w/o deleting
|
|
// it at the end instead of a static variable. This is to avoid the following
|
|
// destruction order disaster happens when a child thread using ThreadLocalPtr
|
|
// dies AFTER the main thread dies: When a child thread happens to use
|
|
// ThreadLocalPtr, it will try to delete its thread-local data on its
|
|
// OnThreadExit when the child thread dies. However, OnThreadExit depends
|
|
// on the following variable. As a result, if the main thread dies before any
|
|
// child thread happen to use ThreadLocalPtr dies, then the destruction of
|
|
// the following variable will go first, then OnThreadExit, therefore causing
|
|
// invalid access.
|
|
//
|
|
// The above problem can be solved by using thread_local to store tls_ instead
|
|
// of using __thread. The major difference between thread_local and __thread
|
|
// is that thread_local supports dynamic construction and destruction of
|
|
// non-primitive typed variables. As a result, we can guarantee the
|
|
// destruction order even when the main thread dies before any child threads.
|
|
// However, thread_local is not supported in all compilers that accept -std=c++11
|
|
// (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
|
|
static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
|
|
return inst;
|
|
}
|
|
|
|
port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
|
|
|
|
void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
|
|
auto* tls = static_cast<ThreadData*>(ptr);
|
|
assert(tls != nullptr);
|
|
|
|
// Use the cached StaticMeta::Instance() instead of directly calling
|
|
// the variable inside StaticMeta::Instance() might already go out of
|
|
// scope here in case this OnThreadExit is called after the main thread
|
|
// dies.
|
|
auto* inst = tls->inst;
|
|
pthread_setspecific(inst->pthread_key_, nullptr);
|
|
|
|
MutexLock l(inst->MemberMutex());
|
|
inst->RemoveThreadData(tls);
|
|
// Unref stored pointers of current thread from all instances
|
|
uint32_t id = 0;
|
|
for (auto& e : tls->entries) {
|
|
void* raw = e.ptr.load();
|
|
if (raw != nullptr) {
|
|
auto unref = inst->GetHandler(id);
|
|
if (unref != nullptr) {
|
|
unref(raw);
|
|
}
|
|
}
|
|
++id;
|
|
}
|
|
// Delete thread local structure no matter if it is Mac platform
|
|
delete tls;
|
|
}
|
|
|
|
ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) {
|
|
if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
|
|
abort();
|
|
}
|
|
|
|
// OnThreadExit is not getting called on the main thread.
|
|
// Call through the static destructor mechanism to avoid memory leak.
|
|
//
|
|
// Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
|
|
// singleton (destructors are invoked in reverse order of constructor
|
|
// _completion_); the latter must not mutate internal members. This
|
|
// cleanup mechanism inherently relies on use-after-release of the
|
|
// StaticMeta, and is brittle with respect to compiler-specific handling
|
|
// of memory backing destructed statically-scoped objects. Perhaps
|
|
// registering with atexit(3) would be more robust.
|
|
//
|
|
// This is not required on Windows.
|
|
#if !defined(OS_WIN)
|
|
static struct A {
|
|
~A() {
|
|
#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
|
|
ThreadData* tls_ =
|
|
static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
|
|
#endif
|
|
if (tls_) {
|
|
OnThreadExit(tls_);
|
|
}
|
|
}
|
|
} a;
|
|
#endif // !defined(OS_WIN)
|
|
|
|
head_.next = &head_;
|
|
head_.prev = &head_;
|
|
|
|
#ifdef OS_WIN
|
|
// Share with Windows its cleanup routine and the key
|
|
wintlscleanup::thread_local_inclass_routine = OnThreadExit;
|
|
wintlscleanup::thread_local_key = pthread_key_;
|
|
#endif
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
|
|
Mutex()->AssertHeld();
|
|
d->next = &head_;
|
|
d->prev = head_.prev;
|
|
head_.prev->next = d;
|
|
head_.prev = d;
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::RemoveThreadData(
|
|
ThreadData* d) {
|
|
Mutex()->AssertHeld();
|
|
d->next->prev = d->prev;
|
|
d->prev->next = d->next;
|
|
d->next = d->prev = d;
|
|
}
|
|
|
|
ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
|
|
#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
|
|
// Make this local variable name look like a member variable so that we
|
|
// can share all the code below
|
|
ThreadData* tls_ =
|
|
static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
|
|
#endif
|
|
|
|
if (UNLIKELY(tls_ == nullptr)) {
|
|
auto* inst = Instance();
|
|
tls_ = new ThreadData(inst);
|
|
{
|
|
// Register it in the global chain, needs to be done before thread exit
|
|
// handler registration
|
|
MutexLock l(Mutex());
|
|
inst->AddThreadData(tls_);
|
|
}
|
|
// Even it is not OS_MACOSX, need to register value for pthread_key_ so that
|
|
// its exit handler will be triggered.
|
|
if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
|
|
{
|
|
MutexLock l(Mutex());
|
|
inst->RemoveThreadData(tls_);
|
|
}
|
|
delete tls_;
|
|
abort();
|
|
}
|
|
}
|
|
return tls_;
|
|
}
|
|
|
|
void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
|
|
auto* tls = GetThreadLocal();
|
|
if (UNLIKELY(id >= tls->entries.size())) {
|
|
return nullptr;
|
|
}
|
|
return tls->entries[id].ptr.load(std::memory_order_acquire);
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
|
|
auto* tls = GetThreadLocal();
|
|
if (UNLIKELY(id >= tls->entries.size())) {
|
|
// Need mutex to protect entries access within ReclaimId
|
|
MutexLock l(Mutex());
|
|
tls->entries.resize(id + 1);
|
|
}
|
|
tls->entries[id].ptr.store(ptr, std::memory_order_release);
|
|
}
|
|
|
|
void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
|
|
auto* tls = GetThreadLocal();
|
|
if (UNLIKELY(id >= tls->entries.size())) {
|
|
// Need mutex to protect entries access within ReclaimId
|
|
MutexLock l(Mutex());
|
|
tls->entries.resize(id + 1);
|
|
}
|
|
return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
|
|
}
|
|
|
|
bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
|
|
void*& expected) {
|
|
auto* tls = GetThreadLocal();
|
|
if (UNLIKELY(id >= tls->entries.size())) {
|
|
// Need mutex to protect entries access within ReclaimId
|
|
MutexLock l(Mutex());
|
|
tls->entries.resize(id + 1);
|
|
}
|
|
return tls->entries[id].ptr.compare_exchange_strong(
|
|
expected, ptr, std::memory_order_release, std::memory_order_relaxed);
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
|
|
void* const replacement) {
|
|
MutexLock l(Mutex());
|
|
for (ThreadData* t = head_.next; t != &head_; t = t->next) {
|
|
if (id < t->entries.size()) {
|
|
void* ptr =
|
|
t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
|
|
if (ptr != nullptr) {
|
|
ptrs->push_back(ptr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
|
|
MutexLock l(Mutex());
|
|
for (ThreadData* t = head_.next; t != &head_; t = t->next) {
|
|
if (id < t->entries.size()) {
|
|
void* ptr = t->entries[id].ptr.load();
|
|
if (ptr != nullptr) {
|
|
func(ptr, res);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t ThreadLocalPtr::TEST_PeekId() {
|
|
return Instance()->PeekId();
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
|
|
MutexLock l(Mutex());
|
|
handler_map_[id] = handler;
|
|
}
|
|
|
|
UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
|
|
Mutex()->AssertHeld();
|
|
auto iter = handler_map_.find(id);
|
|
if (iter == handler_map_.end()) {
|
|
return nullptr;
|
|
}
|
|
return iter->second;
|
|
}
|
|
|
|
uint32_t ThreadLocalPtr::StaticMeta::GetId() {
|
|
MutexLock l(Mutex());
|
|
if (free_instance_ids_.empty()) {
|
|
return next_instance_id_++;
|
|
}
|
|
|
|
uint32_t id = free_instance_ids_.back();
|
|
free_instance_ids_.pop_back();
|
|
return id;
|
|
}
|
|
|
|
uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
|
|
MutexLock l(Mutex());
|
|
if (!free_instance_ids_.empty()) {
|
|
return free_instance_ids_.back();
|
|
}
|
|
return next_instance_id_;
|
|
}
|
|
|
|
void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
|
|
// This id is not used, go through all thread local data and release
|
|
// corresponding value
|
|
MutexLock l(Mutex());
|
|
auto unref = GetHandler(id);
|
|
for (ThreadData* t = head_.next; t != &head_; t = t->next) {
|
|
if (id < t->entries.size()) {
|
|
void* ptr = t->entries[id].ptr.exchange(nullptr);
|
|
if (ptr != nullptr && unref != nullptr) {
|
|
unref(ptr);
|
|
}
|
|
}
|
|
}
|
|
handler_map_[id] = nullptr;
|
|
free_instance_ids_.push_back(id);
|
|
}
|
|
|
|
ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
|
|
: id_(Instance()->GetId()) {
|
|
if (handler != nullptr) {
|
|
Instance()->SetHandler(id_, handler);
|
|
}
|
|
}
|
|
|
|
ThreadLocalPtr::~ThreadLocalPtr() {
|
|
Instance()->ReclaimId(id_);
|
|
}
|
|
|
|
void* ThreadLocalPtr::Get() const {
|
|
return Instance()->Get(id_);
|
|
}
|
|
|
|
void ThreadLocalPtr::Reset(void* ptr) {
|
|
Instance()->Reset(id_, ptr);
|
|
}
|
|
|
|
void* ThreadLocalPtr::Swap(void* ptr) {
|
|
return Instance()->Swap(id_, ptr);
|
|
}
|
|
|
|
bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
|
|
return Instance()->CompareAndSwap(id_, ptr, expected);
|
|
}
|
|
|
|
void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
|
|
Instance()->Scrape(id_, ptrs, replacement);
|
|
}
|
|
|
|
void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
|
|
Instance()->Fold(id_, func, res);
|
|
}
|
|
|
|
} // namespace rocksdb
|