2014-01-22 20:44:53 +01:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2014-01-25 03:40:05 +01:00
|
|
|
#include <unordered_map>
|
2014-01-22 20:44:53 +01:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
2014-01-24 23:30:28 +01:00
|
|
|
#include "rocksdb/options.h"
|
|
|
|
#include "db/memtablelist.h"
|
2014-01-28 20:05:04 +01:00
|
|
|
#include "db/write_batch_internal.h"
|
2014-01-24 23:30:28 +01:00
|
|
|
|
2014-01-22 20:44:53 +01:00
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class Version;
|
|
|
|
class VersionSet;
|
2014-01-24 23:30:28 +01:00
|
|
|
class MemTable;
|
|
|
|
class MemTableListVersion;
|
2014-02-01 00:30:27 +01:00
|
|
|
class CompactionPicker;
|
|
|
|
class Compaction;
|
|
|
|
class InternalKey;
|
2014-01-24 23:30:28 +01:00
|
|
|
|
|
|
|
// holds references to memtable, all immutable memtables and version
|
|
|
|
struct SuperVersion {
|
|
|
|
MemTable* mem;
|
|
|
|
MemTableListVersion* imm;
|
|
|
|
Version* current;
|
|
|
|
std::atomic<uint32_t> refs;
|
|
|
|
// We need to_delete because during Cleanup(), imm->Unref() returns
|
|
|
|
// all memtables that we need to free through this vector. We then
|
|
|
|
// delete all those memtables outside of mutex, during destruction
|
|
|
|
std::vector<MemTable*> to_delete;
|
|
|
|
|
|
|
|
// should be called outside the mutex
|
2014-01-31 00:23:13 +01:00
|
|
|
SuperVersion();
|
2014-01-24 23:30:28 +01:00
|
|
|
~SuperVersion();
|
|
|
|
SuperVersion* Ref();
|
|
|
|
// Returns true if this was the last reference and caller should
|
|
|
|
// call Clenaup() and delete the object
|
|
|
|
bool Unref();
|
|
|
|
|
|
|
|
// call these two methods with db mutex held
|
|
|
|
// Cleanup unrefs mem, imm and current. Also, it stores all memtables
|
|
|
|
// that needs to be deleted in to_delete vector. Unrefing those
|
|
|
|
// objects needs to be done in the mutex
|
|
|
|
void Cleanup();
|
|
|
|
void Init(MemTable* new_mem, MemTableListVersion* new_imm,
|
|
|
|
Version* new_current);
|
|
|
|
};
|
2014-01-22 20:44:53 +01:00
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
// column family metadata. not thread-safe. should be protected by db_mutex
|
|
|
|
class ColumnFamilyData {
|
|
|
|
public:
|
2014-01-22 20:44:53 +01:00
|
|
|
ColumnFamilyData(uint32_t id, const std::string& name,
|
|
|
|
Version* dummy_versions, const ColumnFamilyOptions& options);
|
|
|
|
~ColumnFamilyData();
|
2014-01-24 23:30:28 +01:00
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
uint32_t GetID() const { return id_; }
|
|
|
|
const std::string& GetName() { return name_; }
|
|
|
|
|
2014-02-01 00:30:27 +01:00
|
|
|
int NumberLevels() const { return options_.num_levels; }
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
|
|
|
|
uint64_t GetLogNumber() const { return log_number_; }
|
|
|
|
|
|
|
|
ColumnFamilyOptions* options() { return &options_; }
|
|
|
|
|
|
|
|
MemTableList* imm() { return &imm_; }
|
|
|
|
MemTable* mem() { return mem_; }
|
|
|
|
Version* current() { return current_; }
|
|
|
|
Version* dummy_versions() { return dummy_versions_; }
|
|
|
|
void SetMemtable(MemTable* new_mem) { mem_ = new_mem; }
|
2014-01-31 00:23:13 +01:00
|
|
|
void SetCurrent(Version* current);
|
2014-01-24 23:30:28 +01:00
|
|
|
void CreateNewMemtable();
|
2014-01-29 22:28:50 +01:00
|
|
|
|
2014-02-01 00:30:27 +01:00
|
|
|
// See documentation in compaction_picker.h
|
|
|
|
Compaction* PickCompaction();
|
|
|
|
Compaction* CompactRange(int input_level, int output_level,
|
|
|
|
const InternalKey* begin, const InternalKey* end,
|
|
|
|
InternalKey** compaction_end);
|
|
|
|
|
|
|
|
CompactionPicker* compaction_picker() const {
|
|
|
|
return compaction_picker_.get();
|
|
|
|
}
|
|
|
|
const InternalKeyComparator& internal_comparator() const { return icmp_; }
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
SuperVersion* GetSuperVersion() const { return super_version_; }
|
|
|
|
uint64_t GetSuperVersionNumber() const {
|
|
|
|
return super_version_number_.load();
|
|
|
|
}
|
|
|
|
// will return a pointer to SuperVersion* if previous SuperVersion
|
|
|
|
// if its reference count is zero and needs deletion or nullptr if not
|
|
|
|
// As argument takes a pointer to allocated SuperVersion to enable
|
|
|
|
// the clients to allocate SuperVersion outside of mutex.
|
|
|
|
SuperVersion* InstallSuperVersion(SuperVersion* new_superversion);
|
|
|
|
|
2014-01-31 00:23:13 +01:00
|
|
|
// A Flag indicating whether write needs to slowdown because of there are
|
|
|
|
// too many number of level0 files.
|
|
|
|
bool NeedSlowdownForNumLevel0Files() const {
|
|
|
|
return need_slowdown_for_num_level0_files_;
|
|
|
|
}
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
private:
|
2014-01-31 01:49:46 +01:00
|
|
|
friend class ColumnFamilySet;
|
|
|
|
|
|
|
|
ColumnFamilyData* next() { return next_.load(); }
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
uint32_t id_;
|
|
|
|
const std::string name_;
|
|
|
|
Version* dummy_versions_; // Head of circular doubly-linked list of versions.
|
|
|
|
Version* current_; // == dummy_versions->prev_
|
|
|
|
ColumnFamilyOptions options_;
|
|
|
|
|
2014-02-01 00:30:27 +01:00
|
|
|
const InternalKeyComparator icmp_;
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
MemTable* mem_;
|
|
|
|
MemTableList imm_;
|
|
|
|
SuperVersion* super_version_;
|
|
|
|
|
|
|
|
// An ordinal representing the current SuperVersion. Updated by
|
|
|
|
// InstallSuperVersion(), i.e. incremented every time super_version_
|
|
|
|
// changes.
|
|
|
|
std::atomic<uint64_t> super_version_number_;
|
|
|
|
|
2014-01-31 01:49:46 +01:00
|
|
|
// pointers for a circular linked list. we use it to support iterations
|
|
|
|
// that can be concurrent with writes
|
|
|
|
std::atomic<ColumnFamilyData*> next_;
|
|
|
|
std::atomic<ColumnFamilyData*> prev_;
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
// This is the earliest log file number that contains data from this
|
|
|
|
// Column Family. All earlier log files must be ignored and not
|
|
|
|
// recovered from
|
|
|
|
uint64_t log_number_;
|
2014-01-31 00:23:13 +01:00
|
|
|
|
|
|
|
// A flag indicating whether we should delay writes because
|
|
|
|
// we have too many level 0 files
|
|
|
|
bool need_slowdown_for_num_level0_files_;
|
2014-02-01 00:30:27 +01:00
|
|
|
|
|
|
|
// An object that keeps all the compaction stats
|
|
|
|
// and picks the next compaction
|
|
|
|
std::unique_ptr<CompactionPicker> compaction_picker_;
|
2014-01-22 20:44:53 +01:00
|
|
|
};
|
|
|
|
|
2014-01-29 22:28:50 +01:00
|
|
|
// Thread safe only for reading without a writer. All access should be
|
|
|
|
// locked when adding or dropping column family
|
2014-01-22 20:44:53 +01:00
|
|
|
class ColumnFamilySet {
|
|
|
|
public:
|
2014-01-24 23:30:28 +01:00
|
|
|
class iterator {
|
|
|
|
public:
|
2014-01-31 01:49:46 +01:00
|
|
|
explicit iterator(ColumnFamilyData* cfd)
|
|
|
|
: current_(cfd) {}
|
2014-01-24 23:30:28 +01:00
|
|
|
iterator& operator++() {
|
2014-01-31 01:49:46 +01:00
|
|
|
current_ = current_->next();
|
2014-01-24 23:30:28 +01:00
|
|
|
return *this;
|
|
|
|
}
|
2014-01-31 01:49:46 +01:00
|
|
|
bool operator!=(const iterator& other) {
|
|
|
|
return this->current_ != other.current_;
|
|
|
|
}
|
|
|
|
ColumnFamilyData* operator*() { return current_; }
|
2014-01-24 23:30:28 +01:00
|
|
|
|
|
|
|
private:
|
2014-01-31 01:49:46 +01:00
|
|
|
ColumnFamilyData* current_;
|
2014-01-24 23:30:28 +01:00
|
|
|
};
|
2014-01-22 20:44:53 +01:00
|
|
|
|
|
|
|
ColumnFamilySet();
|
|
|
|
~ColumnFamilySet();
|
|
|
|
|
|
|
|
ColumnFamilyData* GetDefault() const;
|
|
|
|
// GetColumnFamily() calls return nullptr if column family is not found
|
|
|
|
ColumnFamilyData* GetColumnFamily(uint32_t id) const;
|
|
|
|
bool Exists(uint32_t id);
|
|
|
|
bool Exists(const std::string& name);
|
|
|
|
uint32_t GetID(const std::string& name);
|
|
|
|
// this call will return the next available column family ID. it guarantees
|
|
|
|
// that there is no column family with id greater than or equal to the
|
|
|
|
// returned value in the current running instance. It does not, however,
|
|
|
|
// guarantee that the returned ID is unique accross RocksDB restarts.
|
|
|
|
// For example, if a client adds a column family 6 and then drops it,
|
|
|
|
// after a restart, we might reuse column family 6 ID.
|
|
|
|
uint32_t GetNextColumnFamilyID();
|
|
|
|
|
|
|
|
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id,
|
|
|
|
Version* dummy_version,
|
|
|
|
const ColumnFamilyOptions& options);
|
|
|
|
void DropColumnFamily(uint32_t id);
|
|
|
|
|
2014-01-31 01:49:46 +01:00
|
|
|
iterator begin() { return iterator(dummy_cfd_->next()); }
|
|
|
|
iterator end() { return iterator(dummy_cfd_); }
|
2014-01-22 20:44:53 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
std::unordered_map<std::string, uint32_t> column_families_;
|
|
|
|
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_;
|
|
|
|
// we need to keep them alive because we still can't control the lifetime of
|
|
|
|
// all of column family data members (options for example)
|
|
|
|
std::vector<ColumnFamilyData*> droppped_column_families_;
|
|
|
|
uint32_t max_column_family_;
|
2014-01-31 01:49:46 +01:00
|
|
|
ColumnFamilyData* dummy_cfd_;
|
2014-01-22 20:44:53 +01:00
|
|
|
};
|
|
|
|
|
2014-01-28 20:05:04 +01:00
|
|
|
class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables {
|
|
|
|
public:
|
|
|
|
explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set)
|
|
|
|
: column_family_set_(column_family_set), log_number_(0) {}
|
|
|
|
|
|
|
|
// If column_family_data->log_number is bigger than log_number,
|
|
|
|
// the memtable will not be returned.
|
|
|
|
// If log_number == 0, the memtable will be always returned
|
|
|
|
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
|
|
|
|
|
|
|
|
// Returns the column families memtable if log_number == 0 || log_number <=
|
|
|
|
// column_family_data->log_number.
|
|
|
|
// If column family doesn't exist, it asserts
|
|
|
|
virtual MemTable* GetMemTable(uint32_t column_family_id) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
ColumnFamilySet* column_family_set_;
|
|
|
|
uint64_t log_number_;
|
|
|
|
};
|
|
|
|
|
2014-01-22 20:44:53 +01:00
|
|
|
} // namespace rocksdb
|