2013-10-16 23:59:46 +02:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2013-12-11 04:03:13 +01:00
|
|
|
#include "db/memtable_list.h"
|
2012-11-29 01:42:36 +01:00
|
|
|
|
2012-10-19 23:00:53 +02:00
|
|
|
#include <string>
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/db.h"
|
2012-10-19 23:00:53 +02:00
|
|
|
#include "db/memtable.h"
|
2014-01-24 23:30:28 +01:00
|
|
|
#include "db/version_set.h"
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/env.h"
|
|
|
|
#include "rocksdb/iterator.h"
|
2012-10-19 23:00:53 +02:00
|
|
|
#include "util/coding.h"
|
2014-04-03 06:49:51 +02:00
|
|
|
#include "util/log_buffer.h"
|
2012-10-19 23:00:53 +02:00
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
namespace rocksdb {
|
2012-10-19 23:00:53 +02:00
|
|
|
|
|
|
|
class InternalKeyComparator;
|
|
|
|
class Mutex;
|
|
|
|
class VersionSet;
|
|
|
|
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
MemTableListVersion::MemTableListVersion(MemTableListVersion* old) {
|
|
|
|
if (old != nullptr) {
|
|
|
|
memlist_ = old->memlist_;
|
|
|
|
size_ = old->size_;
|
|
|
|
for (auto& m : memlist_) {
|
|
|
|
m->Ref();
|
|
|
|
}
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
void MemTableListVersion::Ref() { ++refs_; }
|
|
|
|
|
2014-01-28 19:35:48 +01:00
|
|
|
void MemTableListVersion::Unref(autovector<MemTable*>* to_delete) {
|
2014-01-25 22:50:30 +01:00
|
|
|
assert(refs_ >= 1);
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
--refs_;
|
|
|
|
if (refs_ == 0) {
|
|
|
|
// if to_delete is equal to nullptr it means we're confident
|
|
|
|
// that refs_ will not be zero
|
|
|
|
assert(to_delete != nullptr);
|
|
|
|
for (const auto& m : memlist_) {
|
|
|
|
MemTable* x = m->Unref();
|
|
|
|
if (x != nullptr) {
|
|
|
|
to_delete->push_back(x);
|
|
|
|
}
|
2013-11-25 20:55:36 +01:00
|
|
|
}
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
delete this;
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
int MemTableListVersion::size() const { return size_; }
|
|
|
|
|
2012-10-19 23:00:53 +02:00
|
|
|
// Returns the total number of memtables in the list
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
int MemTableList::size() const {
|
|
|
|
assert(num_flush_not_started_ <= current_->size_);
|
|
|
|
return current_->size_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search all the memtables starting from the most recent one.
|
|
|
|
// Return the most recent value found, if any.
|
|
|
|
// Operands stores the list of merge operations to apply, so far.
|
|
|
|
bool MemTableListVersion::Get(const LookupKey& key, std::string* value,
|
|
|
|
Status* s, MergeContext& merge_context,
|
|
|
|
const Options& options) {
|
|
|
|
for (auto& memtable : memlist_) {
|
|
|
|
if (memtable->Get(key, value, s, merge_context, options)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MemTableListVersion::AddIterators(const ReadOptions& options,
|
|
|
|
std::vector<Iterator*>* iterator_list) {
|
|
|
|
for (auto& m : memlist_) {
|
|
|
|
iterator_list->push_back(m->NewIterator(options));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-23 02:17:33 +02:00
|
|
|
uint64_t MemTableListVersion::GetTotalNumEntries() const {
|
|
|
|
uint64_t total_num = 0;
|
|
|
|
for (auto& m : memlist_) {
|
|
|
|
total_num += m->GetNumEntries();
|
|
|
|
}
|
|
|
|
return total_num;
|
|
|
|
}
|
|
|
|
|
2014-01-27 02:40:43 +01:00
|
|
|
// caller is responsible for referencing m
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
void MemTableListVersion::Add(MemTable* m) {
|
|
|
|
assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable
|
|
|
|
memlist_.push_front(m);
|
|
|
|
++size_;
|
|
|
|
}
|
|
|
|
|
2014-01-27 02:40:43 +01:00
|
|
|
// caller is responsible for unreferencing m
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
void MemTableListVersion::Remove(MemTable* m) {
|
|
|
|
assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable
|
|
|
|
memlist_.remove(m);
|
|
|
|
--size_;
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Returns true if there is at least one memtable on which flush has
|
|
|
|
// not yet started.
|
2014-03-18 20:37:42 +01:00
|
|
|
bool MemTableList::IsFlushPending() const {
|
2013-09-05 02:24:35 +02:00
|
|
|
if ((flush_requested_ && num_flush_not_started_ >= 1) ||
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
(num_flush_not_started_ >= min_write_buffer_number_to_merge_)) {
|
2013-03-01 03:04:58 +01:00
|
|
|
assert(imm_flush_needed.NoBarrier_Load() != nullptr);
|
2012-10-19 23:00:53 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-06-11 23:23:58 +02:00
|
|
|
// Returns the memtables that need to be flushed.
|
2014-01-02 20:26:57 +01:00
|
|
|
void MemTableList::PickMemtablesToFlush(autovector<MemTable*>* ret) {
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
const auto& memlist = current_->memlist_;
|
|
|
|
for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
|
2012-10-19 23:00:53 +02:00
|
|
|
MemTable* m = *it;
|
|
|
|
if (!m->flush_in_progress_) {
|
|
|
|
assert(!m->flush_completed_);
|
2012-11-29 01:42:36 +01:00
|
|
|
num_flush_not_started_--;
|
2012-10-19 23:00:53 +02:00
|
|
|
if (num_flush_not_started_ == 0) {
|
2013-03-01 03:04:58 +01:00
|
|
|
imm_flush_needed.Release_Store(nullptr);
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
2014-01-28 19:35:48 +01:00
|
|
|
m->flush_in_progress_ = true; // flushing will start very soon
|
2013-06-11 23:23:58 +02:00
|
|
|
ret->push_back(m);
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
}
|
2014-01-28 19:35:48 +01:00
|
|
|
flush_requested_ = false; // start-flush request is complete
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
|
2014-02-12 20:42:54 +01:00
|
|
|
void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
|
2014-02-12 23:01:30 +01:00
|
|
|
uint64_t file_number,
|
|
|
|
std::set<uint64_t>* pending_outputs) {
|
2014-02-12 20:42:54 +01:00
|
|
|
assert(!mems.empty());
|
|
|
|
|
|
|
|
// If the flush was not successful, then just reset state.
|
|
|
|
// Maybe a suceeding attempt to flush will be successful.
|
|
|
|
for (MemTable* m : mems) {
|
|
|
|
assert(m->flush_in_progress_);
|
|
|
|
assert(m->file_number_ == 0);
|
|
|
|
|
|
|
|
m->flush_in_progress_ = false;
|
|
|
|
m->flush_completed_ = false;
|
|
|
|
m->edit_.Clear();
|
|
|
|
num_flush_not_started_++;
|
|
|
|
}
|
|
|
|
pending_outputs->erase(file_number);
|
|
|
|
imm_flush_needed.Release_Store(reinterpret_cast<void *>(1));
|
|
|
|
}
|
|
|
|
|
2012-10-19 23:00:53 +02:00
|
|
|
// Record a successful flush in the manifest file
|
2013-06-11 23:23:58 +02:00
|
|
|
Status MemTableList::InstallMemtableFlushResults(
|
2014-02-07 00:42:16 +01:00
|
|
|
ColumnFamilyData* cfd, const autovector<MemTable*>& mems, VersionSet* vset,
|
2014-01-27 20:02:21 +01:00
|
|
|
port::Mutex* mu, Logger* info_log, uint64_t file_number,
|
2014-01-28 19:35:48 +01:00
|
|
|
std::set<uint64_t>& pending_outputs, autovector<MemTable*>* to_delete,
|
2014-04-07 20:29:48 +02:00
|
|
|
Directory* db_directory, LogBuffer* log_buffer) {
|
2012-10-19 23:00:53 +02:00
|
|
|
mu->AssertHeld();
|
|
|
|
|
|
|
|
// flush was sucessful
|
2013-10-28 06:55:46 +01:00
|
|
|
for (size_t i = 0; i < mems.size(); ++i) {
|
2013-06-11 23:23:58 +02:00
|
|
|
// All the edits are associated with the first memtable of this batch.
|
2013-10-28 07:08:54 +01:00
|
|
|
assert(i == 0 || mems[i]->GetEdits()->NumEntries() == 0);
|
2013-06-11 23:23:58 +02:00
|
|
|
|
2013-10-28 06:55:46 +01:00
|
|
|
mems[i]->flush_completed_ = true;
|
|
|
|
mems[i]->file_number_ = file_number;
|
2013-06-11 23:23:58 +02:00
|
|
|
}
|
2012-10-19 23:00:53 +02:00
|
|
|
|
|
|
|
// if some other thread is already commiting, then return
|
|
|
|
Status s;
|
|
|
|
if (commit_in_progress_) {
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only a single thread can be executing this piece of code
|
|
|
|
commit_in_progress_ = true;
|
|
|
|
|
|
|
|
// scan all memtables from the earliest, and commit those
|
2013-06-11 23:23:58 +02:00
|
|
|
// (in that order) that have finished flushing. Memetables
|
|
|
|
// are always committed in the order that they were created.
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
while (!current_->memlist_.empty() && s.ok()) {
|
|
|
|
MemTable* m = current_->memlist_.back(); // get the last element
|
2012-10-19 23:00:53 +02:00
|
|
|
if (!m->flush_completed_) {
|
|
|
|
break;
|
|
|
|
}
|
2013-06-11 23:23:58 +02:00
|
|
|
|
2014-04-25 15:51:16 +02:00
|
|
|
LogToBuffer(log_buffer, "[%s] Level-0 commit table #%lu started",
|
|
|
|
cfd->GetName().c_str(), (unsigned long)m->file_number_);
|
2012-10-19 23:00:53 +02:00
|
|
|
|
2012-11-29 01:42:36 +01:00
|
|
|
// this can release and reacquire the mutex.
|
2014-01-27 22:55:47 +01:00
|
|
|
s = vset->LogAndApply(cfd, &m->edit_, mu, db_directory);
|
2012-10-19 23:00:53 +02:00
|
|
|
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
// we will be changing the version in the next code path,
|
|
|
|
// so we better create a new one, since versions are immutable
|
|
|
|
InstallNewVersion();
|
|
|
|
|
2013-06-11 23:23:58 +02:00
|
|
|
// All the later memtables that have the same filenum
|
|
|
|
// are part of the same batch. They can be committed now.
|
2013-10-25 00:58:00 +02:00
|
|
|
uint64_t mem_id = 1; // how many memtables has been flushed.
|
2013-06-11 23:23:58 +02:00
|
|
|
do {
|
|
|
|
if (s.ok()) { // commit new state
|
2014-04-25 15:51:16 +02:00
|
|
|
LogToBuffer(log_buffer,
|
|
|
|
"[%s] Level-0 commit table #%lu: memtable #%lu done",
|
|
|
|
cfd->GetName().c_str(), (unsigned long)m->file_number_,
|
|
|
|
(unsigned long)mem_id);
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
current_->Remove(m);
|
2013-06-11 23:23:58 +02:00
|
|
|
assert(m->file_number_ > 0);
|
|
|
|
|
|
|
|
// pending_outputs can be cleared only after the newly created file
|
|
|
|
// has been written to a committed version so that other concurrently
|
|
|
|
// executing compaction threads do not mistakenly assume that this
|
|
|
|
// file is not live.
|
|
|
|
pending_outputs.erase(m->file_number_);
|
2013-11-27 23:56:20 +01:00
|
|
|
if (m->Unref() != nullptr) {
|
|
|
|
to_delete->push_back(m);
|
|
|
|
}
|
2013-06-11 23:23:58 +02:00
|
|
|
} else {
|
|
|
|
//commit failed. setup state so that we can flush again.
|
2013-10-25 00:58:00 +02:00
|
|
|
Log(info_log,
|
2013-11-13 06:02:03 +01:00
|
|
|
"Level-0 commit table #%lu: memtable #%lu failed",
|
|
|
|
(unsigned long)m->file_number_,
|
|
|
|
(unsigned long)mem_id);
|
2013-06-11 23:23:58 +02:00
|
|
|
m->flush_completed_ = false;
|
|
|
|
m->flush_in_progress_ = false;
|
|
|
|
m->edit_.Clear();
|
|
|
|
num_flush_not_started_++;
|
|
|
|
pending_outputs.erase(m->file_number_);
|
|
|
|
m->file_number_ = 0;
|
|
|
|
imm_flush_needed.Release_Store((void *)1);
|
|
|
|
}
|
2013-10-25 00:58:00 +02:00
|
|
|
++mem_id;
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
} while (!current_->memlist_.empty() && (m = current_->memlist_.back()) &&
|
2013-06-11 23:23:58 +02:00
|
|
|
m->file_number_ == file_number);
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
commit_in_progress_ = false;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2012-11-29 01:42:36 +01:00
|
|
|
// New memtables are inserted at the front of the list.
|
2012-10-19 23:00:53 +02:00
|
|
|
void MemTableList::Add(MemTable* m) {
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
assert(current_->size_ >= num_flush_not_started_);
|
|
|
|
InstallNewVersion();
|
2014-01-27 02:40:43 +01:00
|
|
|
// this method is used to move mutable memtable into an immutable list.
|
|
|
|
// since mutable memtable is already refcounted by the DBImpl,
|
|
|
|
// and when moving to the imutable list we don't unref it,
|
|
|
|
// we don't have to ref the memtable here. we just take over the
|
|
|
|
// reference from the DBImpl.
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
current_->Add(m);
|
2013-08-23 08:10:02 +02:00
|
|
|
m->MarkImmutable();
|
2012-10-19 23:00:53 +02:00
|
|
|
num_flush_not_started_++;
|
|
|
|
if (num_flush_not_started_ == 1) {
|
|
|
|
imm_flush_needed.Release_Store((void *)1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns an estimate of the number of bytes of data in use.
|
|
|
|
size_t MemTableList::ApproximateMemoryUsage() {
|
|
|
|
size_t size = 0;
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
for (auto& memtable : current_->memlist_) {
|
2013-08-09 08:07:36 +02:00
|
|
|
size += memtable->ApproximateMemoryUsage();
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
void MemTableList::InstallNewVersion() {
|
|
|
|
if (current_->refs_ == 1) {
|
|
|
|
// we're the only one using the version, just keep using it
|
|
|
|
} else {
|
|
|
|
// somebody else holds the current version, we need to create new one
|
|
|
|
MemTableListVersion* version = current_;
|
|
|
|
current_ = new MemTableListVersion(current_);
|
2014-01-25 22:50:30 +01:00
|
|
|
current_->Ref();
|
MemTableListVersion
Summary:
MemTableListVersion is to MemTableList what Version is to VersionSet. I took almost the same ideas to develop MemTableListVersion. The reason is to have copying std::list done in background, while flushing, rather than in foreground (MultiGet() and NewIterator()) under a mutex! Also, whenever we copied MemTableList, we copied also some MemTableList metadata (flush_requested_, commit_in_progress_, etc.), which was wasteful.
This diff avoids std::list copy under a mutex in both MultiGet() and NewIterator(). I created a small database with some number of immutable memtables, and creating 100.000 iterators in a single-thread (!) decreased from {188739, 215703, 198028} to {154352, 164035, 159817}. A lot of the savings come from code under a mutex, so we should see much higher savings with multiple threads. Creating new iterator is very important to LogDevice team.
I also think this diff will make SuperVersion obsolete for performance reasons. I will try it in the next diff. SuperVersion gave us huge savings on Get() code path, but I think that most of the savings came from copying MemTableList under a mutex. If we had MemTableListVersion, we would never need to copy the entire object (like we still do in NewIterator() and MultiGet())
Test Plan: `make check` works. I will also do `make valgrind_check` before commit
Reviewers: dhruba, haobo, kailiu, sdong, emayanke, tnovak
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15255
2014-01-24 23:52:08 +01:00
|
|
|
version->Unref();
|
2012-10-19 23:00:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
} // namespace rocksdb
|