Pass logger to memtable rep and TLB page allocation error logged to info logs
Summary: TLB page allocation errors are now logged to info logs, instead of stderr. In order to do that, mem table rep's factory functions take a info logger now. Test Plan: make all check Reviewers: haobo, igor, yhchiang Reviewed By: yhchiang CC: leveldb, yhchiang, dhruba Differential Revision: https://reviews.facebook.net/D18471
This commit is contained in:
parent
044af85847
commit
3a171dcb51
@ -5,6 +5,7 @@
|
||||
### Public API changes
|
||||
* Added _LEVEL to all InfoLogLevel enums
|
||||
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
|
||||
* MemTableRepFactory::CreateMemTableRep() takes info logger as an extra parameter.
|
||||
|
||||
### New Features
|
||||
* Column family support
|
||||
|
@ -37,7 +37,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
|
||||
kWriteBufferSize(options.write_buffer_size),
|
||||
arena_(options.arena_block_size),
|
||||
table_(options.memtable_factory->CreateMemTableRep(
|
||||
comparator_, &arena_, options.prefix_extractor.get())),
|
||||
comparator_, &arena_, options.prefix_extractor.get(),
|
||||
options.info_log.get())),
|
||||
num_entries_(0),
|
||||
flush_in_progress_(false),
|
||||
flush_completed_(false),
|
||||
@ -55,7 +56,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
|
||||
prefix_bloom_.reset(new DynamicBloom(
|
||||
options.memtable_prefix_bloom_bits, options.bloom_locality,
|
||||
options.memtable_prefix_bloom_probes, nullptr,
|
||||
options.memtable_prefix_bloom_huge_page_tlb_size));
|
||||
options.memtable_prefix_bloom_huge_page_tlb_size,
|
||||
options.info_log.get()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,7 @@ class Arena;
|
||||
class LookupKey;
|
||||
class Slice;
|
||||
class SliceTransform;
|
||||
class Logger;
|
||||
|
||||
typedef void* KeyHandle;
|
||||
|
||||
@ -174,7 +175,8 @@ class MemTableRepFactory {
|
||||
public:
|
||||
virtual ~MemTableRepFactory() {}
|
||||
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
|
||||
Arena*, const SliceTransform*) = 0;
|
||||
Arena*, const SliceTransform*,
|
||||
Logger* logger) = 0;
|
||||
virtual const char* Name() const = 0;
|
||||
};
|
||||
|
||||
@ -182,8 +184,8 @@ class MemTableRepFactory {
|
||||
class SkipListFactory : public MemTableRepFactory {
|
||||
public:
|
||||
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
|
||||
Arena*,
|
||||
const SliceTransform*) override;
|
||||
Arena*, const SliceTransform*,
|
||||
Logger* logger) override;
|
||||
virtual const char* Name() const override { return "SkipListFactory"; }
|
||||
};
|
||||
|
||||
@ -201,9 +203,9 @@ class VectorRepFactory : public MemTableRepFactory {
|
||||
|
||||
public:
|
||||
explicit VectorRepFactory(size_t count = 0) : count_(count) { }
|
||||
virtual MemTableRep* CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator&, Arena*,
|
||||
const SliceTransform*) override;
|
||||
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
|
||||
Arena*, const SliceTransform*,
|
||||
Logger* logger) override;
|
||||
virtual const char* Name() const override {
|
||||
return "VectorRepFactory";
|
||||
}
|
||||
|
@ -272,7 +272,8 @@ void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) {
|
||||
uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey;
|
||||
if (bloom_total_bits > 0) {
|
||||
bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality,
|
||||
6, nullptr, huge_page_tlb_size_));
|
||||
6, nullptr, huge_page_tlb_size_,
|
||||
options_.info_log.get()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,8 +329,8 @@ void PlainTableReader::FillIndexes(
|
||||
Log(options_.info_log, "Reserving %zu bytes for plain table's sub_index",
|
||||
kSubIndexSize);
|
||||
auto total_allocate_size = sizeof(uint32_t) * index_size_ + kSubIndexSize;
|
||||
char* allocated =
|
||||
arena_.AllocateAligned(total_allocate_size, huge_page_tlb_size_);
|
||||
char* allocated = arena_.AllocateAligned(
|
||||
total_allocate_size, huge_page_tlb_size_, options_.info_log.get());
|
||||
index_ = reinterpret_cast<uint32_t*>(allocated);
|
||||
sub_index_ = allocated + sizeof(uint32_t) * index_size_;
|
||||
|
||||
@ -398,7 +399,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props) {
|
||||
uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey;
|
||||
if (num_bloom_bits > 0) {
|
||||
bloom_.reset(new DynamicBloom(num_bloom_bits, options_.bloom_locality, 6,
|
||||
nullptr, huge_page_tlb_size_));
|
||||
nullptr, huge_page_tlb_size_,
|
||||
options_.info_log.get()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "util/arena.h"
|
||||
#include <sys/mman.h>
|
||||
#include <algorithm>
|
||||
#include "rocksdb/env.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -70,20 +71,23 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
|
||||
}
|
||||
}
|
||||
|
||||
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size) {
|
||||
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size,
|
||||
Logger* logger) {
|
||||
assert((kAlignUnit & (kAlignUnit - 1)) ==
|
||||
0); // Pointer size should be a power of 2
|
||||
|
||||
#ifdef OS_LINUX
|
||||
if (huge_page_tlb_size > 0 && bytes > 0) {
|
||||
// Allocate from a huge page TBL table.
|
||||
assert(logger != nullptr); // logger need to be passed in.
|
||||
size_t reserved_size =
|
||||
((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size;
|
||||
assert(reserved_size >= bytes);
|
||||
void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE),
|
||||
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);
|
||||
if (addr == MAP_FAILED) {
|
||||
// TODO(sdong): Better handling
|
||||
Warn(logger, "AllocateAligned fail to allocate huge TLB pages: %s",
|
||||
strerror(errno));
|
||||
// fail back to malloc
|
||||
} else {
|
||||
blocks_memory_ += reserved_size;
|
||||
|
@ -20,6 +20,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class Logger;
|
||||
|
||||
class Arena {
|
||||
public:
|
||||
// No copying allowed
|
||||
@ -41,7 +43,12 @@ class Arena {
|
||||
// huge pages for it to be allocated, like:
|
||||
// sysctl -w vm.nr_hugepages=20
|
||||
// See linux doc Documentation/vm/hugetlbpage.txt for details.
|
||||
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0);
|
||||
// huge page allocation can fail. In this case it will fail back to
|
||||
// normal cases. The messages will be logged to logger. So when calling with
|
||||
// huge_page_tlb_size > 0, we highly recommend a logger is passed in.
|
||||
// Otherwise, the error message will be printed out to stderr directly.
|
||||
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0,
|
||||
Logger* logger = nullptr);
|
||||
|
||||
// Returns an estimate of the total memory usage of data allocated
|
||||
// by the arena (exclude the space allocated but not yet used for future
|
||||
|
@ -22,7 +22,7 @@ static uint32_t BloomHash(const Slice& key) {
|
||||
DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
|
||||
uint32_t num_probes,
|
||||
uint32_t (*hash_func)(const Slice& key),
|
||||
size_t huge_page_tlb_size)
|
||||
size_t huge_page_tlb_size, Logger* logger)
|
||||
: kBlocked(cl_per_block > 0),
|
||||
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
|
||||
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock *
|
||||
@ -40,7 +40,7 @@ DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
|
||||
sz += CACHE_LINE_SIZE - 1;
|
||||
}
|
||||
raw_ = reinterpret_cast<unsigned char*>(
|
||||
arena_.AllocateAligned(sz, huge_page_tlb_size));
|
||||
arena_.AllocateAligned(sz, huge_page_tlb_size, logger));
|
||||
memset(raw_, 0, sz);
|
||||
if (kBlocked && (reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE)) {
|
||||
data_ = raw_ + CACHE_LINE_SIZE -
|
||||
|
@ -13,6 +13,7 @@
|
||||
namespace rocksdb {
|
||||
|
||||
class Slice;
|
||||
class Logger;
|
||||
|
||||
class DynamicBloom {
|
||||
public:
|
||||
@ -29,7 +30,8 @@ class DynamicBloom {
|
||||
explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0,
|
||||
uint32_t num_probes = 6,
|
||||
uint32_t (*hash_func)(const Slice& key) = nullptr,
|
||||
size_t huge_page_tlb_size = 0);
|
||||
size_t huge_page_tlb_size = 0,
|
||||
Logger* logger = nullptr);
|
||||
|
||||
~DynamicBloom() {}
|
||||
|
||||
|
@ -314,7 +314,8 @@ void HashCuckooRep::Insert(KeyHandle handle) {
|
||||
// immutable.
|
||||
if (backup_table_.get() == nullptr) {
|
||||
VectorRepFactory factory(10);
|
||||
backup_table_.reset(factory.CreateMemTableRep(compare_, arena_, nullptr));
|
||||
backup_table_.reset(
|
||||
factory.CreateMemTableRep(compare_, arena_, nullptr, nullptr));
|
||||
is_nearly_full_ = true;
|
||||
}
|
||||
backup_table_->Insert(key);
|
||||
@ -595,7 +596,7 @@ void HashCuckooRep::Iterator::SeekToLast() {
|
||||
|
||||
MemTableRep* HashCuckooRepFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) {
|
||||
const SliceTransform* transform, Logger* logger) {
|
||||
// The estimated average fullness. The write performance of any close hash
|
||||
// degrades as the fullness of the mem-table increases. Setting kFullness
|
||||
// to a value around 0.7 can better avoid write performance degradation while
|
||||
|
@ -29,7 +29,7 @@ class HashCuckooRepFactory : public MemTableRepFactory {
|
||||
|
||||
virtual MemTableRep* CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) override;
|
||||
const SliceTransform* transform, Logger* logger) override;
|
||||
|
||||
virtual const char* Name() const override { return "HashCuckooRepFactory"; }
|
||||
|
||||
|
@ -54,7 +54,7 @@ class HashLinkListRep : public MemTableRep {
|
||||
public:
|
||||
HashLinkListRep(const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform, size_t bucket_size,
|
||||
size_t huge_page_tlb_size);
|
||||
size_t huge_page_tlb_size, Logger* logger);
|
||||
|
||||
virtual KeyHandle Allocate(const size_t len, char** buf) override;
|
||||
|
||||
@ -307,13 +307,14 @@ class HashLinkListRep : public MemTableRep {
|
||||
|
||||
HashLinkListRep::HashLinkListRep(const MemTableRep::KeyComparator& compare,
|
||||
Arena* arena, const SliceTransform* transform,
|
||||
size_t bucket_size, size_t huge_page_tlb_size)
|
||||
size_t bucket_size, size_t huge_page_tlb_size,
|
||||
Logger* logger)
|
||||
: MemTableRep(arena),
|
||||
bucket_size_(bucket_size),
|
||||
transform_(transform),
|
||||
compare_(compare) {
|
||||
char* mem = arena_->AllocateAligned(sizeof(port::AtomicPointer) * bucket_size,
|
||||
huge_page_tlb_size);
|
||||
huge_page_tlb_size, logger);
|
||||
|
||||
buckets_ = new (mem) port::AtomicPointer[bucket_size];
|
||||
|
||||
@ -469,9 +470,9 @@ Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head,
|
||||
|
||||
MemTableRep* HashLinkListRepFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) {
|
||||
const SliceTransform* transform, Logger* logger) {
|
||||
return new HashLinkListRep(compare, arena, transform, bucket_count_,
|
||||
huge_page_tlb_size_);
|
||||
huge_page_tlb_size_, logger);
|
||||
}
|
||||
|
||||
MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count,
|
||||
|
@ -23,7 +23,7 @@ class HashLinkListRepFactory : public MemTableRepFactory {
|
||||
|
||||
virtual MemTableRep* CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) override;
|
||||
const SliceTransform* transform, Logger* logger) override;
|
||||
|
||||
virtual const char* Name() const override {
|
||||
return "HashLinkListRepFactory";
|
||||
|
@ -320,7 +320,7 @@ MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
|
||||
|
||||
MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) {
|
||||
const SliceTransform* transform, Logger* logger) {
|
||||
return new HashSkipListRep(compare, arena, transform, bucket_count_,
|
||||
skiplist_height_, skiplist_branching_factor_);
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ class HashSkipListRepFactory : public MemTableRepFactory {
|
||||
|
||||
virtual MemTableRep* CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform) override;
|
||||
const SliceTransform* transform, Logger* logger) override;
|
||||
|
||||
virtual const char* Name() const override {
|
||||
return "HashSkipListRepFactory";
|
||||
|
@ -116,7 +116,7 @@ public:
|
||||
|
||||
MemTableRep* SkipListFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform*) {
|
||||
const SliceTransform*, Logger* logger) {
|
||||
return new SkipListRep(compare, arena);
|
||||
}
|
||||
|
||||
|
@ -275,7 +275,7 @@ MemTableRep::Iterator* VectorRep::GetIterator() {
|
||||
|
||||
MemTableRep* VectorRepFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform*) {
|
||||
const SliceTransform*, Logger* logger) {
|
||||
return new VectorRep(compare, arena, count_);
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
Loading…
Reference in New Issue
Block a user