249eff0f30
Summary: Since read threads do not coordinate on loading data into block cache, two threads between Lookup and Insert can end up loading and inserting the same data. This is particularly concerning with cache_index_and_filter_blocks since those are hot and more likely to be race targets if ejected from (or not pre-populated in) the cache. Particularly with moves toward disaggregated / network storage, the cost of redundant retrieval might be high, and we should at least have some hard statistics from which we can estimate impact. Example with full filter thrashing "cliff": $ ./db_bench --benchmarks=fillrandom --num=15000000 --cache_index_and_filter_blocks -bloom_bits=10 ... $ ./db_bench --db=/tmp/rocksdbtest-172704/dbbench --use_existing_db --benchmarks=readrandom,stats --num=200000 --cache_index_and_filter_blocks --cache_size=$((130 * 1024 * 1024)) --bloom_bits=10 --threads=16 -statistics 2>&1 | egrep '^rocksdb.block.cache.(.*add|.*redundant)' | grep -v compress | sort rocksdb.block.cache.add COUNT : 14181 rocksdb.block.cache.add.failures COUNT : 0 rocksdb.block.cache.add.redundant COUNT : 476 rocksdb.block.cache.data.add COUNT : 12749 rocksdb.block.cache.data.add.redundant COUNT : 18 rocksdb.block.cache.filter.add COUNT : 1003 rocksdb.block.cache.filter.add.redundant COUNT : 217 rocksdb.block.cache.index.add COUNT : 429 rocksdb.block.cache.index.add.redundant COUNT : 241 $ ./db_bench --db=/tmp/rocksdbtest-172704/dbbench --use_existing_db --benchmarks=readrandom,stats --num=200000 --cache_index_and_filter_blocks --cache_size=$((120 * 1024 * 1024)) --bloom_bits=10 --threads=16 -statistics 2>&1 | egrep '^rocksdb.block.cache.(.*add|.*redundant)' | grep -v compress | sort rocksdb.block.cache.add COUNT : 1182223 rocksdb.block.cache.add.failures COUNT : 0 rocksdb.block.cache.add.redundant COUNT : 302728 rocksdb.block.cache.data.add COUNT : 31425 rocksdb.block.cache.data.add.redundant COUNT : 12 rocksdb.block.cache.filter.add COUNT : 795455 rocksdb.block.cache.filter.add.redundant COUNT : 130238 rocksdb.block.cache.index.add COUNT : 355343 rocksdb.block.cache.index.add.redundant COUNT : 172478 Pull Request resolved: https://github.com/facebook/rocksdb/pull/6681 Test Plan: Some manual testing (above) and unit test covering key metrics is included Reviewed By: ltamasi Differential Revision: D21134113 Pulled By: pdillinger fbshipit-source-id: c11497b5f00f4ffdfe919823904e52d0a1a91d87
145 lines
4.3 KiB
C++
145 lines
4.3 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "rocksdb/status.h"
|
|
#include <stdio.h>
|
|
#ifdef OS_WIN
|
|
#include <string.h>
|
|
#endif
|
|
#include <cstring>
|
|
#include "port/port.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
const char* Status::CopyState(const char* state) {
|
|
#ifdef OS_WIN
|
|
const size_t cch = std::strlen(state) + 1; // +1 for the null terminator
|
|
char* result = new char[cch];
|
|
errno_t ret
|
|
#if defined(_MSC_VER)
|
|
;
|
|
#else
|
|
__attribute__((__unused__));
|
|
#endif
|
|
ret = strncpy_s(result, cch, state, cch - 1);
|
|
result[cch - 1] = '\0';
|
|
assert(ret == 0);
|
|
return result;
|
|
#else
|
|
const size_t cch = std::strlen(state) + 1; // +1 for the null terminator
|
|
return std::strncpy(new char[cch], state, cch);
|
|
#endif
|
|
}
|
|
|
|
static const char* msgs[static_cast<int>(Status::kMaxSubCode)] = {
|
|
"", // kNone
|
|
"Timeout Acquiring Mutex", // kMutexTimeout
|
|
"Timeout waiting to lock key", // kLockTimeout
|
|
"Failed to acquire lock due to max_num_locks limit", // kLockLimit
|
|
"No space left on device", // kNoSpace
|
|
"Deadlock", // kDeadlock
|
|
"Stale file handle", // kStaleFile
|
|
"Memory limit reached", // kMemoryLimit
|
|
"Space limit reached", // kSpaceLimit
|
|
"No such file or directory", // kPathNotFound
|
|
// KMergeOperandsInsufficientCapacity
|
|
"Insufficient capacity for merge operands",
|
|
// kManualCompactionPaused
|
|
"Manual compaction paused",
|
|
" (overwritten)", // kOverwritten, subcode of OK
|
|
};
|
|
|
|
Status::Status(Code _code, SubCode _subcode, const Slice& msg,
|
|
const Slice& msg2)
|
|
: code_(_code), subcode_(_subcode), sev_(kNoError) {
|
|
assert(code_ != kOk);
|
|
assert(subcode_ != kMaxSubCode);
|
|
const size_t len1 = msg.size();
|
|
const size_t len2 = msg2.size();
|
|
const size_t size = len1 + (len2 ? (2 + len2) : 0);
|
|
char* const result = new char[size + 1]; // +1 for null terminator
|
|
memcpy(result, msg.data(), len1);
|
|
if (len2) {
|
|
result[len1] = ':';
|
|
result[len1 + 1] = ' ';
|
|
memcpy(result + len1 + 2, msg2.data(), len2);
|
|
}
|
|
result[size] = '\0'; // null terminator for C style string
|
|
state_ = result;
|
|
}
|
|
|
|
std::string Status::ToString() const {
|
|
char tmp[30];
|
|
const char* type;
|
|
switch (code_) {
|
|
case kOk:
|
|
return "OK";
|
|
case kNotFound:
|
|
type = "NotFound: ";
|
|
break;
|
|
case kCorruption:
|
|
type = "Corruption: ";
|
|
break;
|
|
case kNotSupported:
|
|
type = "Not implemented: ";
|
|
break;
|
|
case kInvalidArgument:
|
|
type = "Invalid argument: ";
|
|
break;
|
|
case kIOError:
|
|
type = "IO error: ";
|
|
break;
|
|
case kMergeInProgress:
|
|
type = "Merge in progress: ";
|
|
break;
|
|
case kIncomplete:
|
|
type = "Result incomplete: ";
|
|
break;
|
|
case kShutdownInProgress:
|
|
type = "Shutdown in progress: ";
|
|
break;
|
|
case kTimedOut:
|
|
type = "Operation timed out: ";
|
|
break;
|
|
case kAborted:
|
|
type = "Operation aborted: ";
|
|
break;
|
|
case kBusy:
|
|
type = "Resource busy: ";
|
|
break;
|
|
case kExpired:
|
|
type = "Operation expired: ";
|
|
break;
|
|
case kTryAgain:
|
|
type = "Operation failed. Try again.: ";
|
|
break;
|
|
case kColumnFamilyDropped:
|
|
type = "Column family dropped: ";
|
|
break;
|
|
default:
|
|
snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
|
|
static_cast<int>(code()));
|
|
type = tmp;
|
|
break;
|
|
}
|
|
std::string result(type);
|
|
if (subcode_ != kNone) {
|
|
uint32_t index = static_cast<int32_t>(subcode_);
|
|
assert(sizeof(msgs) > index);
|
|
result.append(msgs[index]);
|
|
}
|
|
|
|
if (state_ != nullptr) {
|
|
result.append(state_);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|