Pass logger to memtable rep and TLB page allocation error logged to info logs

Summary:
TLB page allocation errors are now logged to info logs, instead of stderr.
In order to do that, mem table rep's factory functions take a info logger now.

Test Plan: make all check

Reviewers: haobo, igor, yhchiang

Reviewed By: yhchiang

CC: leveldb, yhchiang, dhruba

Differential Revision: https://reviews.facebook.net/D18471
This commit is contained in:
sdong 2014-05-04 15:52:23 -07:00
parent 044af85847
commit 3a171dcb51
16 changed files with 53 additions and 31 deletions

View File

@ -5,6 +5,7 @@
### Public API changes
* Added _LEVEL to all InfoLogLevel enums
* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
* MemTableRepFactory::CreateMemTableRep() takes info logger as an extra parameter.
### New Features
* Column family support

View File

@ -37,7 +37,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
kWriteBufferSize(options.write_buffer_size),
arena_(options.arena_block_size),
table_(options.memtable_factory->CreateMemTableRep(
comparator_, &arena_, options.prefix_extractor.get())),
comparator_, &arena_, options.prefix_extractor.get(),
options.info_log.get())),
num_entries_(0),
flush_in_progress_(false),
flush_completed_(false),
@ -55,7 +56,8 @@ MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
prefix_bloom_.reset(new DynamicBloom(
options.memtable_prefix_bloom_bits, options.bloom_locality,
options.memtable_prefix_bloom_probes, nullptr,
options.memtable_prefix_bloom_huge_page_tlb_size));
options.memtable_prefix_bloom_huge_page_tlb_size,
options.info_log.get()));
}
}

View File

@ -44,6 +44,7 @@ class Arena;
class LookupKey;
class Slice;
class SliceTransform;
class Logger;
typedef void* KeyHandle;
@ -174,7 +175,8 @@ class MemTableRepFactory {
public:
virtual ~MemTableRepFactory() {}
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
Arena*, const SliceTransform*) = 0;
Arena*, const SliceTransform*,
Logger* logger) = 0;
virtual const char* Name() const = 0;
};
@ -182,8 +184,8 @@ class MemTableRepFactory {
class SkipListFactory : public MemTableRepFactory {
public:
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
Arena*,
const SliceTransform*) override;
Arena*, const SliceTransform*,
Logger* logger) override;
virtual const char* Name() const override { return "SkipListFactory"; }
};
@ -201,9 +203,9 @@ class VectorRepFactory : public MemTableRepFactory {
public:
explicit VectorRepFactory(size_t count = 0) : count_(count) { }
virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator&, Arena*,
const SliceTransform*) override;
virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
Arena*, const SliceTransform*,
Logger* logger) override;
virtual const char* Name() const override {
return "VectorRepFactory";
}

View File

@ -272,7 +272,8 @@ void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) {
uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey;
if (bloom_total_bits > 0) {
bloom_.reset(new DynamicBloom(bloom_total_bits, options_.bloom_locality,
6, nullptr, huge_page_tlb_size_));
6, nullptr, huge_page_tlb_size_,
options_.info_log.get()));
}
}
@ -328,8 +329,8 @@ void PlainTableReader::FillIndexes(
Log(options_.info_log, "Reserving %zu bytes for plain table's sub_index",
kSubIndexSize);
auto total_allocate_size = sizeof(uint32_t) * index_size_ + kSubIndexSize;
char* allocated =
arena_.AllocateAligned(total_allocate_size, huge_page_tlb_size_);
char* allocated = arena_.AllocateAligned(
total_allocate_size, huge_page_tlb_size_, options_.info_log.get());
index_ = reinterpret_cast<uint32_t*>(allocated);
sub_index_ = allocated + sizeof(uint32_t) * index_size_;
@ -398,7 +399,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props) {
uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey;
if (num_bloom_bits > 0) {
bloom_.reset(new DynamicBloom(num_bloom_bits, options_.bloom_locality, 6,
nullptr, huge_page_tlb_size_));
nullptr, huge_page_tlb_size_,
options_.info_log.get()));
}
}

View File

@ -10,6 +10,7 @@
#include "util/arena.h"
#include <sys/mman.h>
#include <algorithm>
#include "rocksdb/env.h"
namespace rocksdb {
@ -70,20 +71,23 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
}
}
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size) {
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size,
Logger* logger) {
assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2
#ifdef OS_LINUX
if (huge_page_tlb_size > 0 && bytes > 0) {
// Allocate from a huge page TBL table.
assert(logger != nullptr); // logger need to be passed in.
size_t reserved_size =
((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size;
assert(reserved_size >= bytes);
void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);
if (addr == MAP_FAILED) {
// TODO(sdong): Better handling
Warn(logger, "AllocateAligned fail to allocate huge TLB pages: %s",
strerror(errno));
// fail back to malloc
} else {
blocks_memory_ += reserved_size;

View File

@ -20,6 +20,8 @@
namespace rocksdb {
class Logger;
class Arena {
public:
// No copying allowed
@ -41,7 +43,12 @@ class Arena {
// huge pages for it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt for details.
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0);
// huge page allocation can fail. In this case it will fail back to
// normal cases. The messages will be logged to logger. So when calling with
// huge_page_tlb_size > 0, we highly recommend a logger is passed in.
// Otherwise, the error message will be printed out to stderr directly.
char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
// Returns an estimate of the total memory usage of data allocated
// by the arena (exclude the space allocated but not yet used for future

View File

@ -22,7 +22,7 @@ static uint32_t BloomHash(const Slice& key) {
DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
uint32_t num_probes,
uint32_t (*hash_func)(const Slice& key),
size_t huge_page_tlb_size)
size_t huge_page_tlb_size, Logger* logger)
: kBlocked(cl_per_block > 0),
kBitsPerBlock(std::min(cl_per_block, num_probes) * CACHE_LINE_SIZE * 8),
kTotalBits((kBlocked ? (total_bits + kBitsPerBlock - 1) / kBitsPerBlock *
@ -40,7 +40,7 @@ DynamicBloom::DynamicBloom(uint32_t total_bits, uint32_t cl_per_block,
sz += CACHE_LINE_SIZE - 1;
}
raw_ = reinterpret_cast<unsigned char*>(
arena_.AllocateAligned(sz, huge_page_tlb_size));
arena_.AllocateAligned(sz, huge_page_tlb_size, logger));
memset(raw_, 0, sz);
if (kBlocked && (reinterpret_cast<uint64_t>(raw_) % CACHE_LINE_SIZE)) {
data_ = raw_ + CACHE_LINE_SIZE -

View File

@ -13,6 +13,7 @@
namespace rocksdb {
class Slice;
class Logger;
class DynamicBloom {
public:
@ -29,7 +30,8 @@ class DynamicBloom {
explicit DynamicBloom(uint32_t total_bits, uint32_t cl_per_block = 0,
uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr,
size_t huge_page_tlb_size = 0);
size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
~DynamicBloom() {}

View File

@ -314,7 +314,8 @@ void HashCuckooRep::Insert(KeyHandle handle) {
// immutable.
if (backup_table_.get() == nullptr) {
VectorRepFactory factory(10);
backup_table_.reset(factory.CreateMemTableRep(compare_, arena_, nullptr));
backup_table_.reset(
factory.CreateMemTableRep(compare_, arena_, nullptr, nullptr));
is_nearly_full_ = true;
}
backup_table_->Insert(key);
@ -595,7 +596,7 @@ void HashCuckooRep::Iterator::SeekToLast() {
MemTableRep* HashCuckooRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) {
const SliceTransform* transform, Logger* logger) {
// The estimated average fullness. The write performance of any close hash
// degrades as the fullness of the mem-table increases. Setting kFullness
// to a value around 0.7 can better avoid write performance degradation while

View File

@ -29,7 +29,7 @@ class HashCuckooRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override;
const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override { return "HashCuckooRepFactory"; }

View File

@ -54,7 +54,7 @@ class HashLinkListRep : public MemTableRep {
public:
HashLinkListRep(const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size,
size_t huge_page_tlb_size);
size_t huge_page_tlb_size, Logger* logger);
virtual KeyHandle Allocate(const size_t len, char** buf) override;
@ -307,13 +307,14 @@ class HashLinkListRep : public MemTableRep {
HashLinkListRep::HashLinkListRep(const MemTableRep::KeyComparator& compare,
Arena* arena, const SliceTransform* transform,
size_t bucket_size, size_t huge_page_tlb_size)
size_t bucket_size, size_t huge_page_tlb_size,
Logger* logger)
: MemTableRep(arena),
bucket_size_(bucket_size),
transform_(transform),
compare_(compare) {
char* mem = arena_->AllocateAligned(sizeof(port::AtomicPointer) * bucket_size,
huge_page_tlb_size);
huge_page_tlb_size, logger);
buckets_ = new (mem) port::AtomicPointer[bucket_size];
@ -469,9 +470,9 @@ Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head,
MemTableRep* HashLinkListRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) {
const SliceTransform* transform, Logger* logger) {
return new HashLinkListRep(compare, arena, transform, bucket_count_,
huge_page_tlb_size_);
huge_page_tlb_size_, logger);
}
MemTableRepFactory* NewHashLinkListRepFactory(size_t bucket_count,

View File

@ -23,7 +23,7 @@ class HashLinkListRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override;
const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override {
return "HashLinkListRepFactory";

View File

@ -320,7 +320,7 @@ MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) {
const SliceTransform* transform, Logger* logger) {
return new HashSkipListRep(compare, arena, transform, bucket_count_,
skiplist_height_, skiplist_branching_factor_);
}

View File

@ -27,7 +27,7 @@ class HashSkipListRepFactory : public MemTableRepFactory {
virtual MemTableRep* CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform) override;
const SliceTransform* transform, Logger* logger) override;
virtual const char* Name() const override {
return "HashSkipListRepFactory";

View File

@ -116,7 +116,7 @@ public:
MemTableRep* SkipListFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform*) {
const SliceTransform*, Logger* logger) {
return new SkipListRep(compare, arena);
}

View File

@ -275,7 +275,7 @@ MemTableRep::Iterator* VectorRep::GetIterator() {
MemTableRep* VectorRepFactory::CreateMemTableRep(
const MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform*) {
const SliceTransform*, Logger* logger) {
return new VectorRep(compare, arena, count_);
}
} // namespace rocksdb