rocksdb/util/thread_local.cc
Yueh-Hsuan Chiang 4860bfdeab Fixed a dependency issue of ThreadLocalPtr
Summary:
When a child thread that uses ThreadLocalPtr, ThreadLocalPtr::OnThreadExit
will be called when that child thread is destroyed.  However,
OnThreadExit will try to access a static singleton of ThreadLocalPtr,
which will be destroyed when the main thread exit.  As a result,
when a child thread that uses ThreadLocalPtr exits AFTER the main thread
exits, illegal memory access will occur.

This diff includes a test that reproduce this legacy bug.

    ==2095206==ERROR: AddressSanitizer: heap-use-after-free on address
    0x608000007fa0 at pc 0x959b79 bp 0x7f5fa7426b60 sp 0x7f5fa7426b58
    READ of size 8 at 0x608000007fa0 thread T1

This patch fix this issue by having the thread local mutex never be deleted
(but will leak small piece of memory at the end.)   The patch also describe
a better solution (thread_local) in the comment that requires gcc 4.8.1 and
in latest clang as a future work once we agree to move toward gcc 4.8.

Test Plan:
COMPILE_WITH_ASAN=1 make thread_local_test -j32
./thread_local_test --gtest_filter="*MainThreadDiesFirst"

Reviewers: anthony, hermanlee4, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D53013
2016-02-10 16:56:21 -08:00

396 lines
12 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "util/thread_local.h"
#include "util/mutexlock.h"
#include "port/likely.h"
#include <stdlib.h>
namespace rocksdb {
#if ROCKSDB_SUPPORT_THREAD_LOCAL
__thread ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
#endif
// Windows doesn't support a per-thread destructor with its
// TLS primitives. So, we build it manually by inserting a
// function to be called on each thread's exit.
// See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
// and http://www.nynaeve.net/?p=183
//
// really we do this to have clear conscience since using TLS with thread-pools
// is iffy
// although OK within a request. But otherwise, threads have no identity in its
// modern use.
// This runs on windows only called from the System Loader
#ifdef OS_WIN
// Windows cleanup routine is invoked from a System Loader with a different
// signature so we can not directly hookup the original OnThreadExit which is
// private member
// so we make StaticMeta class share with the us the address of the function so
// we can invoke it.
namespace wintlscleanup {
// This is set to OnThreadExit in StaticMeta singleton constructor
UnrefHandler thread_local_inclass_routine = nullptr;
pthread_key_t thread_local_key = -1;
// Static callback function to call with each thread termination.
void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
// We decided to punt on PROCESS_EXIT
if (DLL_THREAD_DETACH == reason) {
if (thread_local_key != -1 && thread_local_inclass_routine != nullptr) {
void* tls = pthread_getspecific(thread_local_key);
if (tls != nullptr) {
thread_local_inclass_routine(tls);
}
}
}
}
} // wintlscleanup
#ifdef _WIN64
#pragma comment(linker, "/include:_tls_used")
#pragma comment(linker, "/include:p_thread_callback_on_exit")
#else // _WIN64
#pragma comment(linker, "/INCLUDE:__tls_used")
#pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
#endif // _WIN64
// extern "C" suppresses C++ name mangling so we know the symbol name for the
// linker /INCLUDE:symbol pragma above.
extern "C" {
// The linker must not discard thread_callback_on_exit. (We force a reference
// to this variable with a linker /include:symbol pragma to ensure that.) If
// this variable is discarded, the OnThreadExit function will never be called.
#ifdef _WIN64
// .CRT section is merged with .rdata on x64 so it must be constant data.
#pragma const_seg(".CRT$XLB")
// When defining a const variable, it must have external linkage to be sure the
// linker doesn't discard it.
extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
wintlscleanup::WinOnThreadExit;
// Reset the default section.
#pragma const_seg()
#else // _WIN64
#pragma data_seg(".CRT$XLB")
PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
// Reset the default section.
#pragma data_seg()
#endif // _WIN64
} // extern "C"
#endif // OS_WIN
void ThreadLocalPtr::InitSingletons() {
ThreadLocalPtr::StaticMeta::InitSingletons();
}
ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
// Here we prefer function static variable instead of global
// static variable as function static variable is initialized
// when the function is first call. As a result, we can properly
// control their construction order by properly preparing their
// first function call.
//
// Note that here we decide to make "inst" a static pointer w/o deleting
// it at the end instead of a static variable. This is to avoid the following
// destruction order desester happens when a child thread using ThreadLocalPtr
// dies AFTER the main thread dies: When a child thread happens to use
// ThreadLocalPtr, it will try to delete its thread-local data on its
// OnThreadExit when the child thread dies. However, OnThreadExit depends
// on the following variable. As a result, if the main thread dies before any
// child thread happen to use ThreadLocalPtr dies, then the destruction of
// the following variable will go first, then OnThreadExit, therefore causing
// invalid access.
//
// The above problem can be solved by using thread_local to store tls_ instead
// of using __thread. The major difference between thread_local and __thread
// is that thread_local supports dynamic construction and destruction of
// non-primitive typed variables. As a result, we can guarantee the
// desturction order even when the main thread dies before any child threads.
// However, thread_local requires gcc 4.8 and is not supported in all the
// compilers that accepts -std=c++11 (e.g., the default clang on Mac), while
// the current RocksDB still accept gcc 4.7.
static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
return inst;
}
void ThreadLocalPtr::StaticMeta::InitSingletons() { Mutex(); }
port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
auto* tls = static_cast<ThreadData*>(ptr);
assert(tls != nullptr);
// Use the cached StaticMeta::Instance() instead of directly calling
// the variable inside StaticMeta::Instance() might already go out of
// scope here in case this OnThreadExit is called after the main thread
// dies.
auto* inst = tls->inst;
pthread_setspecific(inst->pthread_key_, nullptr);
MutexLock l(inst->MemberMutex());
inst->RemoveThreadData(tls);
// Unref stored pointers of current thread from all instances
uint32_t id = 0;
for (auto& e : tls->entries) {
void* raw = e.ptr.load();
if (raw != nullptr) {
auto unref = inst->GetHandler(id);
if (unref != nullptr) {
unref(raw);
}
}
++id;
}
// Delete thread local structure no matter if it is Mac platform
delete tls;
}
ThreadLocalPtr::StaticMeta::StaticMeta() : next_instance_id_(0), head_(this) {
if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
abort();
}
// OnThreadExit is not getting called on the main thread.
// Call through the static destructor mechanism to avoid memory leak.
//
// Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
// singleton (destructors are invoked in reverse order of constructor
// _completion_); the latter must not mutate internal members. This
// cleanup mechanism inherently relies on use-after-release of the
// StaticMeta, and is brittle with respect to compiler-specific handling
// of memory backing destructed statically-scoped objects. Perhaps
// registering with atexit(3) would be more robust.
//
// This is not required on Windows.
#if !defined(OS_WIN)
static struct A {
~A() {
#if !(ROCKSDB_SUPPORT_THREAD_LOCAL)
ThreadData* tls_ =
static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
#endif
if (tls_) {
OnThreadExit(tls_);
}
}
} a;
#endif // !defined(OS_WIN)
head_.next = &head_;
head_.prev = &head_;
#ifdef OS_WIN
// Share with Windows its cleanup routine and the key
wintlscleanup::thread_local_inclass_routine = OnThreadExit;
wintlscleanup::thread_local_key = pthread_key_;
#endif
}
void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadLocalPtr::ThreadData* d) {
Mutex()->AssertHeld();
d->next = &head_;
d->prev = head_.prev;
head_.prev->next = d;
head_.prev = d;
}
void ThreadLocalPtr::StaticMeta::RemoveThreadData(
ThreadLocalPtr::ThreadData* d) {
Mutex()->AssertHeld();
d->next->prev = d->prev;
d->prev->next = d->next;
d->next = d->prev = d;
}
ThreadLocalPtr::ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
#if !(ROCKSDB_SUPPORT_THREAD_LOCAL)
// Make this local variable name look like a member variable so that we
// can share all the code below
ThreadData* tls_ =
static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
#endif
if (UNLIKELY(tls_ == nullptr)) {
auto* inst = Instance();
tls_ = new ThreadData(inst);
{
// Register it in the global chain, needs to be done before thread exit
// handler registration
MutexLock l(Mutex());
inst->AddThreadData(tls_);
}
// Even it is not OS_MACOSX, need to register value for pthread_key_ so that
// its exit handler will be triggered.
if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
{
MutexLock l(Mutex());
inst->RemoveThreadData(tls_);
}
delete tls_;
abort();
}
}
return tls_;
}
void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
auto* tls = GetThreadLocal();
if (UNLIKELY(id >= tls->entries.size())) {
return nullptr;
}
return tls->entries[id].ptr.load(std::memory_order_acquire);
}
void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
auto* tls = GetThreadLocal();
if (UNLIKELY(id >= tls->entries.size())) {
// Need mutex to protect entries access within ReclaimId
MutexLock l(Mutex());
tls->entries.resize(id + 1);
}
tls->entries[id].ptr.store(ptr, std::memory_order_release);
}
void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
auto* tls = GetThreadLocal();
if (UNLIKELY(id >= tls->entries.size())) {
// Need mutex to protect entries access within ReclaimId
MutexLock l(Mutex());
tls->entries.resize(id + 1);
}
return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
}
bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
void*& expected) {
auto* tls = GetThreadLocal();
if (UNLIKELY(id >= tls->entries.size())) {
// Need mutex to protect entries access within ReclaimId
MutexLock l(Mutex());
tls->entries.resize(id + 1);
}
return tls->entries[id].ptr.compare_exchange_strong(
expected, ptr, std::memory_order_release, std::memory_order_relaxed);
}
void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
void* const replacement) {
MutexLock l(Mutex());
for (ThreadData* t = head_.next; t != &head_; t = t->next) {
if (id < t->entries.size()) {
void* ptr =
t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
if (ptr != nullptr) {
ptrs->push_back(ptr);
}
}
}
}
void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
MutexLock l(Mutex());
handler_map_[id] = handler;
}
UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
Mutex()->AssertHeld();
auto iter = handler_map_.find(id);
if (iter == handler_map_.end()) {
return nullptr;
}
return iter->second;
}
uint32_t ThreadLocalPtr::StaticMeta::GetId() {
MutexLock l(Mutex());
if (free_instance_ids_.empty()) {
return next_instance_id_++;
}
uint32_t id = free_instance_ids_.back();
free_instance_ids_.pop_back();
return id;
}
uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
MutexLock l(Mutex());
if (!free_instance_ids_.empty()) {
return free_instance_ids_.back();
}
return next_instance_id_;
}
void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
// This id is not used, go through all thread local data and release
// corresponding value
MutexLock l(Mutex());
auto unref = GetHandler(id);
for (ThreadData* t = head_.next; t != &head_; t = t->next) {
if (id < t->entries.size()) {
void* ptr = t->entries[id].ptr.exchange(nullptr);
if (ptr != nullptr && unref != nullptr) {
unref(ptr);
}
}
}
handler_map_[id] = nullptr;
free_instance_ids_.push_back(id);
}
ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
: id_(Instance()->GetId()) {
if (handler != nullptr) {
Instance()->SetHandler(id_, handler);
}
}
ThreadLocalPtr::~ThreadLocalPtr() {
Instance()->ReclaimId(id_);
}
void* ThreadLocalPtr::Get() const {
return Instance()->Get(id_);
}
void ThreadLocalPtr::Reset(void* ptr) {
Instance()->Reset(id_, ptr);
}
void* ThreadLocalPtr::Swap(void* ptr) {
return Instance()->Swap(id_, ptr);
}
bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
return Instance()->CompareAndSwap(id_, ptr, expected);
}
void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
Instance()->Scrape(id_, ptrs, replacement);
}
} // namespace rocksdb