Allow GetApproximateSize() to include mem table size if it is skip list memtable
Summary: Add an option in GetApproximateSize() so that the result will include estimated sizes in mem tables. To implement it, implement an estimated count from the beginning to a key in skip list. The approach is to count to find the entry, how many Next() is issued from each level, and sum them with a weight that is <branching factor> ^ <level>. Test Plan: Add a test case Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D40119
This commit is contained in:
parent
d59d90bb1f
commit
40f562e747
@ -15,6 +15,7 @@
|
||||
* WBWIIterator::Entry() now returns WriteEntry instead of `const WriteEntry&`
|
||||
* options.hard_rate_limit is deprecated.
|
||||
* When options.soft_rate_limit or options.level0_slowdown_writes_trigger is triggered, the way to slow down writes is changed to: write rate to DB is limited to to options.delayed_write_rate.
|
||||
* DB::GetApproximateSizes() adds a parameter to allow the estimation to include data in mem table, with default to be not to include. It is now only supported in skip list mem table.
|
||||
|
||||
## 3.11.0 (5/19/2015)
|
||||
### New Features
|
||||
|
@ -3881,27 +3881,26 @@ ColumnFamilyHandle* DBImpl::GetColumnFamilyHandle(uint32_t column_family_id) {
|
||||
}
|
||||
|
||||
void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||
const Range* range, int n, uint64_t* sizes) {
|
||||
const Range* range, int n, uint64_t* sizes,
|
||||
bool include_memtable) {
|
||||
Version* v;
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||
auto cfd = cfh->cfd();
|
||||
{
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
v = cfd->current();
|
||||
v->Ref();
|
||||
}
|
||||
SuperVersion* sv = GetAndRefSuperVersion(cfd);
|
||||
v = sv->current;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
// Convert user_key into a corresponding internal key.
|
||||
InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
|
||||
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
|
||||
sizes[i] = versions_->ApproximateSize(v, k1.Encode(), k2.Encode());
|
||||
if (include_memtable) {
|
||||
sizes[i] += sv->mem->ApproximateSize(k1.Encode(), k2.Encode());
|
||||
sizes[i] += sv->imm->ApproximateSize(k1.Encode(), k2.Encode());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
v->Unref();
|
||||
}
|
||||
ReturnAndCleanupSuperVersion(cfd, sv);
|
||||
}
|
||||
|
||||
std::list<uint64_t>::iterator
|
||||
|
@ -122,8 +122,8 @@ class DBImpl : public DB {
|
||||
const Slice& property, uint64_t* value) override;
|
||||
using DB::GetApproximateSizes;
|
||||
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||
const Range* range, int n,
|
||||
uint64_t* sizes) override;
|
||||
const Range* range, int n, uint64_t* sizes,
|
||||
bool include_memtable = false) override;
|
||||
using DB::CompactRange;
|
||||
virtual Status CompactRange(ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end,
|
||||
|
110
db/db_test.cc
110
db/db_test.cc
@ -6577,6 +6577,112 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) {
|
||||
return result;
|
||||
}
|
||||
|
||||
TEST_F(DBTest, ApproximateSizesMemTable) {
|
||||
Options options;
|
||||
options.write_buffer_size = 100000000; // Large write buffer
|
||||
options.compression = kNoCompression;
|
||||
options.create_if_missing = true;
|
||||
options = CurrentOptions(options);
|
||||
DestroyAndReopen(options);
|
||||
|
||||
const int N = 128;
|
||||
Random rnd(301);
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
|
||||
}
|
||||
|
||||
uint64_t size;
|
||||
std::string start = Key(50);
|
||||
std::string end = Key(60);
|
||||
Range r(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_GT(size, 6000);
|
||||
ASSERT_LT(size, 204800);
|
||||
// Zero if not including mem table
|
||||
db_->GetApproximateSizes(&r, 1, &size, false);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
start = Key(500);
|
||||
end = Key(600);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
|
||||
}
|
||||
|
||||
start = Key(500);
|
||||
end = Key(600);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
start = Key(100);
|
||||
end = Key(1020);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_GT(size, 6000);
|
||||
|
||||
options.max_write_buffer_number = 8;
|
||||
options.min_write_buffer_number_to_merge = 5;
|
||||
options.write_buffer_size = 1024 * N; // Not very large
|
||||
DestroyAndReopen(options);
|
||||
|
||||
int keys[N * 3];
|
||||
for (int i = 0; i < N; i++) {
|
||||
keys[i * 3] = i * 5;
|
||||
keys[i * 3 + 1] = i * 5 + 1;
|
||||
keys[i * 3 + 2] = i * 5 + 2;
|
||||
}
|
||||
std::random_shuffle(std::begin(keys), std::end(keys));
|
||||
|
||||
for (int i = 0; i < N * 3; i++) {
|
||||
ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
|
||||
}
|
||||
|
||||
start = Key(100);
|
||||
end = Key(300);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
start = Key(1050);
|
||||
end = Key(1080);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_GT(size, 6000);
|
||||
|
||||
start = Key(2100);
|
||||
end = Key(2300);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size, true);
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
start = Key(1050);
|
||||
end = Key(1080);
|
||||
r = Range(start, end);
|
||||
uint64_t size_with_mt, size_without_mt;
|
||||
db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
|
||||
ASSERT_GT(size_with_mt, 6000);
|
||||
db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
|
||||
ASSERT_EQ(size_without_mt, 0);
|
||||
|
||||
Flush();
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
|
||||
}
|
||||
|
||||
start = Key(1050);
|
||||
end = Key(1080);
|
||||
r = Range(start, end);
|
||||
db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
|
||||
db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
|
||||
ASSERT_GT(size_with_mt, size_without_mt);
|
||||
ASSERT_GT(size_without_mt, 6000);
|
||||
}
|
||||
|
||||
TEST_F(DBTest, ApproximateSizes) {
|
||||
do {
|
||||
Options options;
|
||||
@ -8948,8 +9054,8 @@ class ModelDB: public DB {
|
||||
}
|
||||
using DB::GetApproximateSizes;
|
||||
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||
const Range* range, int n,
|
||||
uint64_t* sizes) override {
|
||||
const Range* range, int n, uint64_t* sizes,
|
||||
bool include_memtable) override {
|
||||
for (int i = 0; i < n; i++) {
|
||||
sizes[i] = 0;
|
||||
}
|
||||
|
@ -64,6 +64,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
||||
table_(ioptions.memtable_factory->CreateMemTableRep(
|
||||
comparator_, &allocator_, ioptions.prefix_extractor,
|
||||
ioptions.info_log)),
|
||||
data_size_(0),
|
||||
num_entries_(0),
|
||||
num_deletes_(0),
|
||||
flush_in_progress_(false),
|
||||
@ -290,6 +291,26 @@ port::RWMutex* MemTable::GetLock(const Slice& key) {
|
||||
return &locks_[hash(key) % locks_.size()];
|
||||
}
|
||||
|
||||
uint64_t MemTable::ApproximateSize(const Slice& start_ikey,
|
||||
const Slice& end_ikey) {
|
||||
uint64_t entry_count = table_->ApproximateNumEntries(start_ikey, end_ikey);
|
||||
if (entry_count == 0) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t n = num_entries_.load(std::memory_order_relaxed);
|
||||
if (n == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (entry_count > n) {
|
||||
// table_->ApproximateNumEntries() is just an estimate so it can be larger
|
||||
// than actual entries we have. Cap it to entries we have to limit the
|
||||
// inaccuracy.
|
||||
entry_count = n;
|
||||
}
|
||||
uint64_t data_size = data_size_.load(std::memory_order_relaxed);
|
||||
return entry_count * (data_size / n);
|
||||
}
|
||||
|
||||
void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
const Slice& key, /* user key */
|
||||
const Slice& value) {
|
||||
@ -317,7 +338,10 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
memcpy(p, value.data(), val_size);
|
||||
assert((unsigned)(p + val_size - buf) == (unsigned)encoded_len);
|
||||
table_->Insert(handle);
|
||||
num_entries_++;
|
||||
num_entries_.store(num_entries_.load(std::memory_order_relaxed) + 1,
|
||||
std::memory_order_relaxed);
|
||||
data_size_.store(data_size_.load(std::memory_order_relaxed) + encoded_len,
|
||||
std::memory_order_relaxed);
|
||||
if (type == kTypeDeletion) {
|
||||
num_deletes_++;
|
||||
}
|
||||
|
@ -212,7 +212,9 @@ class MemTable {
|
||||
// Get total number of entries in the mem table.
|
||||
// REQUIRES: external synchronization to prevent simultaneous
|
||||
// operations on the same MemTable (unless this Memtable is immutable).
|
||||
uint64_t num_entries() const { return num_entries_; }
|
||||
uint64_t num_entries() const {
|
||||
return num_entries_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Get total number of deletes in the mem table.
|
||||
// REQUIRES: external synchronization to prevent simultaneous
|
||||
@ -275,6 +277,8 @@ class MemTable {
|
||||
return table_->IsSnapshotSupported() && !moptions_.inplace_update_support;
|
||||
}
|
||||
|
||||
uint64_t ApproximateSize(const Slice& start_ikey, const Slice& end_ikey);
|
||||
|
||||
// Get the lock associated for the key
|
||||
port::RWMutex* GetLock(const Slice& key);
|
||||
|
||||
@ -300,7 +304,9 @@ class MemTable {
|
||||
MemTableAllocator allocator_;
|
||||
unique_ptr<MemTableRep> table_;
|
||||
|
||||
uint64_t num_entries_;
|
||||
// Total data size of all data inserted
|
||||
std::atomic<uint64_t> data_size_;
|
||||
std::atomic<uint64_t> num_entries_;
|
||||
uint64_t num_deletes_;
|
||||
|
||||
// These are used to manage memtable flushes to storage
|
||||
|
@ -149,6 +149,15 @@ uint64_t MemTableListVersion::GetTotalNumEntries() const {
|
||||
return total_num;
|
||||
}
|
||||
|
||||
uint64_t MemTableListVersion::ApproximateSize(const Slice& start_ikey,
|
||||
const Slice& end_ikey) {
|
||||
uint64_t total_size = 0;
|
||||
for (auto& m : memlist_) {
|
||||
total_size += m->ApproximateSize(start_ikey, end_ikey);
|
||||
}
|
||||
return total_size;
|
||||
}
|
||||
|
||||
uint64_t MemTableListVersion::GetTotalNumDeletes() const {
|
||||
uint64_t total_num = 0;
|
||||
for (auto& m : memlist_) {
|
||||
|
@ -84,6 +84,8 @@ class MemTableListVersion {
|
||||
|
||||
uint64_t GetTotalNumDeletes() const;
|
||||
|
||||
uint64_t ApproximateSize(const Slice& start_ikey, const Slice& end_ikey);
|
||||
|
||||
// Returns the value of MemTable::GetEarliestSequenceNumber() on the most
|
||||
// recent MemTable in this list or kMaxSequenceNumber if the list is empty.
|
||||
// If include_history=true, will also search Memtables in MemTableList
|
||||
|
@ -59,6 +59,9 @@ class SkipList {
|
||||
// Returns true iff an entry that compares equal to key is in the list.
|
||||
bool Contains(const Key& key) const;
|
||||
|
||||
// Return estimated number of entries smaller than `key`.
|
||||
uint64_t EstimateCount(const Key& key) const;
|
||||
|
||||
// Iteration over the contents of a skip list
|
||||
class Iterator {
|
||||
public:
|
||||
@ -354,10 +357,34 @@ typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Key, class Comparator>
|
||||
template <typename Key, class Comparator>
|
||||
uint64_t SkipList<Key, Comparator>::EstimateCount(const Key& key) const {
|
||||
uint64_t count = 0;
|
||||
|
||||
Node* x = head_;
|
||||
int level = GetMaxHeight() - 1;
|
||||
while (true) {
|
||||
assert(x == head_ || compare_(x->key, key) < 0);
|
||||
Node* next = x->Next(level);
|
||||
if (next == nullptr || compare_(next->key, key) >= 0) {
|
||||
if (level == 0) {
|
||||
return count;
|
||||
} else {
|
||||
// Switch to next list
|
||||
count *= kBranching_;
|
||||
level--;
|
||||
}
|
||||
} else {
|
||||
x = next;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Key, class Comparator>
|
||||
SkipList<Key, Comparator>::SkipList(const Comparator cmp, Allocator* allocator,
|
||||
int32_t max_height,
|
||||
int32_t branching_factor)
|
||||
int32_t max_height,
|
||||
int32_t branching_factor)
|
||||
: kMaxHeight_(max_height),
|
||||
kBranching_(branching_factor),
|
||||
compare_(cmp),
|
||||
|
@ -397,10 +397,12 @@ class DB {
|
||||
//
|
||||
// The results may not include the sizes of recently written data.
|
||||
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||
const Range* range, int n,
|
||||
uint64_t* sizes) = 0;
|
||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes) {
|
||||
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes);
|
||||
const Range* range, int n, uint64_t* sizes,
|
||||
bool include_memtable = false) = 0;
|
||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
|
||||
bool include_memtable = false) {
|
||||
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes,
|
||||
include_memtable);
|
||||
}
|
||||
|
||||
// Compact the underlying storage for the key range [*begin,*end].
|
||||
|
@ -103,6 +103,11 @@ class MemTableRep {
|
||||
virtual void Get(const LookupKey& k, void* callback_args,
|
||||
bool (*callback_func)(void* arg, const char* entry));
|
||||
|
||||
virtual uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
||||
const Slice& end_key) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Report an approximation of how much memory has been used other than memory
|
||||
// that was allocated through the allocator.
|
||||
virtual size_t ApproximateMemoryUsage() = 0;
|
||||
|
@ -121,8 +121,8 @@ class StackableDB : public DB {
|
||||
|
||||
using DB::GetApproximateSizes;
|
||||
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||
const Range* r, int n,
|
||||
uint64_t* sizes) override {
|
||||
const Range* r, int n, uint64_t* sizes,
|
||||
bool include_memtable = false) override {
|
||||
return db_->GetApproximateSizes(column_family, r, n, sizes);
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,15 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ApproximateNumEntries(const Slice& start_ikey,
|
||||
const Slice& end_ikey) override {
|
||||
std::string tmp;
|
||||
uint64_t start_count =
|
||||
skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey));
|
||||
uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey));
|
||||
return (end_count >= start_count) ? (end_count - start_count) : 0;
|
||||
}
|
||||
|
||||
virtual ~SkipListRep() override { }
|
||||
|
||||
// Iteration over the contents of a skip list
|
||||
|
Loading…
x
Reference in New Issue
Block a user