improve memory efficiency of cuckoo reader
Summary: When creating a new iterator, instead of storing mapping from key to bucket id for sorting, store only bucket id and read key from mmap file based on the id. This reduces from 20 bytes per entry to only 4 bytes. Test Plan: db_bench Reviewers: igor, yhchiang, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23757
This commit is contained in:
parent
581442d446
commit
c6275956e2
@ -255,6 +255,8 @@ struct CuckooTablePropertyNames {
|
||||
static const std::string kIdentityAsFirstHash;
|
||||
// Indicate if using module or bit and to calculate hash value
|
||||
static const std::string kUseModuleHash;
|
||||
// Fixed user key length
|
||||
static const std::string kUserKeyLength;
|
||||
};
|
||||
|
||||
struct CuckooTableOptions {
|
||||
|
@ -39,6 +39,8 @@ const std::string CuckooTablePropertyNames::kIdentityAsFirstHash =
|
||||
"rocksdb.cuckoo.hash.identityfirst";
|
||||
const std::string CuckooTablePropertyNames::kUseModuleHash =
|
||||
"rocksdb.cuckoo.hash.usemodule";
|
||||
const std::string CuckooTablePropertyNames::kUserKeyLength =
|
||||
"rocksdb.cuckoo.hash.userkeylength";
|
||||
|
||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
@ -280,6 +282,11 @@ Status CuckooTableBuilder::Finish() {
|
||||
CuckooTablePropertyNames::kUseModuleHash].assign(
|
||||
reinterpret_cast<const char*>(&use_module_hash_),
|
||||
sizeof(use_module_hash_));
|
||||
uint32_t user_key_len = static_cast<uint32_t>(smallest_user_key_.size());
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kUserKeyLength].assign(
|
||||
reinterpret_cast<const char*>(&user_key_len),
|
||||
sizeof(user_key_len));
|
||||
|
||||
// Write meta blocks.
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/cuckoo_table_factory.h"
|
||||
#include "util/arena.h"
|
||||
@ -23,7 +24,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
namespace {
|
||||
static const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
|
||||
const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
|
||||
const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
extern const uint64_t kCuckooTableMagicNumber;
|
||||
@ -62,6 +64,14 @@ CuckooTableReader::CuckooTableReader(
|
||||
unused_key_ = unused_key->second;
|
||||
|
||||
key_length_ = props->fixed_key_len;
|
||||
auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
|
||||
if (user_key_len == user_props.end()) {
|
||||
status_ = Status::Corruption("User key length not found");
|
||||
return;
|
||||
}
|
||||
user_key_length_ = *reinterpret_cast<const uint32_t*>(
|
||||
user_key_len->second.data());
|
||||
|
||||
auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
|
||||
if (value_length == user_props.end()) {
|
||||
status_ = Status::Corruption("Value length not found");
|
||||
@ -104,7 +114,6 @@ CuckooTableReader::CuckooTableReader(
|
||||
}
|
||||
use_module_hash_ = *reinterpret_cast<const bool*>(
|
||||
use_module_hash->second.data());
|
||||
fprintf(stderr, "use_module_hash %d\n", use_module_hash_);
|
||||
auto cuckoo_block_size = user_props.find(
|
||||
CuckooTablePropertyNames::kCuckooBlockSize);
|
||||
if (cuckoo_block_size == user_props.end()) {
|
||||
@ -185,30 +194,39 @@ class CuckooTableIterator : public Iterator {
|
||||
void LoadKeysFromReader();
|
||||
|
||||
private:
|
||||
struct CompareKeys {
|
||||
CompareKeys(const Comparator* ucomp, const bool last_level)
|
||||
: ucomp_(ucomp),
|
||||
is_last_level_(last_level) {}
|
||||
bool operator()(const std::pair<Slice, uint32_t>& first,
|
||||
const std::pair<Slice, uint32_t>& second) const {
|
||||
if (is_last_level_) {
|
||||
return ucomp_->Compare(first.first, second.first) < 0;
|
||||
} else {
|
||||
return ucomp_->Compare(ExtractUserKey(first.first),
|
||||
ExtractUserKey(second.first)) < 0;
|
||||
}
|
||||
struct BucketComparator {
|
||||
BucketComparator(const Slice file_data, const Comparator* ucomp,
|
||||
uint32_t bucket_len, uint32_t user_key_len,
|
||||
const Slice target = Slice())
|
||||
: file_data_(file_data),
|
||||
ucomp_(ucomp),
|
||||
bucket_len_(bucket_len),
|
||||
user_key_len_(user_key_len),
|
||||
target_(target) {}
|
||||
bool operator()(const uint32_t first, const uint32_t second) const {
|
||||
const char* first_bucket =
|
||||
(first == kInvalidIndex) ? target_.data() :
|
||||
&file_data_.data()[first * bucket_len_];
|
||||
const char* second_bucket =
|
||||
(second == kInvalidIndex) ? target_.data() :
|
||||
&file_data_.data()[second * bucket_len_];
|
||||
return ucomp_->Compare(Slice(first_bucket, user_key_len_),
|
||||
Slice(second_bucket, user_key_len_)) < 0;
|
||||
}
|
||||
|
||||
private:
|
||||
const Slice file_data_;
|
||||
const Comparator* ucomp_;
|
||||
const bool is_last_level_;
|
||||
const uint32_t bucket_len_;
|
||||
const uint32_t user_key_len_;
|
||||
const Slice target_;
|
||||
};
|
||||
const CompareKeys comparator_;
|
||||
|
||||
const BucketComparator bucket_comparator_;
|
||||
void PrepareKVAtCurrIdx();
|
||||
CuckooTableReader* reader_;
|
||||
Status status_;
|
||||
// Contains a map of keys to bucket_id sorted in key order.
|
||||
std::vector<std::pair<Slice, uint32_t>> key_to_bucket_id_;
|
||||
std::vector<uint32_t> sorted_bucket_ids_;
|
||||
// We assume that the number of items can be stored in uint32 (4 Billion).
|
||||
uint32_t curr_key_idx_;
|
||||
Slice curr_value_;
|
||||
@ -219,29 +237,31 @@ class CuckooTableIterator : public Iterator {
|
||||
};
|
||||
|
||||
CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
|
||||
: comparator_(reader->ucomp_, reader->is_last_level_),
|
||||
: bucket_comparator_(reader->file_data_, reader->ucomp_,
|
||||
reader->bucket_length_, reader->user_key_length_),
|
||||
reader_(reader),
|
||||
curr_key_idx_(std::numeric_limits<int32_t>::max()) {
|
||||
key_to_bucket_id_.clear();
|
||||
curr_key_idx_(kInvalidIndex) {
|
||||
sorted_bucket_ids_.clear();
|
||||
curr_value_.clear();
|
||||
curr_key_.Clear();
|
||||
}
|
||||
|
||||
void CuckooTableIterator::LoadKeysFromReader() {
|
||||
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
|
||||
sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries);
|
||||
uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
|
||||
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
|
||||
Slice read_key;
|
||||
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
|
||||
reader_->key_length_, &read_key, nullptr);
|
||||
if (read_key != Slice(reader_->unused_key_)) {
|
||||
key_to_bucket_id_.push_back(std::make_pair(read_key, bucket_id));
|
||||
assert(num_buckets < kInvalidIndex);
|
||||
const char* bucket = reader_->file_data_.data();
|
||||
for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
|
||||
if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
|
||||
sorted_bucket_ids_.push_back(bucket_id);
|
||||
}
|
||||
bucket += reader_->bucket_length_;
|
||||
}
|
||||
assert(key_to_bucket_id_.size() ==
|
||||
assert(sorted_bucket_ids_.size() ==
|
||||
reader_->GetTableProperties()->num_entries);
|
||||
std::sort(key_to_bucket_id_.begin(), key_to_bucket_id_.end(), comparator_);
|
||||
curr_key_idx_ = key_to_bucket_id_.size();
|
||||
std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
|
||||
bucket_comparator_);
|
||||
curr_key_idx_ = kInvalidIndex;
|
||||
}
|
||||
|
||||
void CuckooTableIterator::SeekToFirst() {
|
||||
@ -250,25 +270,25 @@ void CuckooTableIterator::SeekToFirst() {
|
||||
}
|
||||
|
||||
void CuckooTableIterator::SeekToLast() {
|
||||
curr_key_idx_ = key_to_bucket_id_.size() - 1;
|
||||
curr_key_idx_ = sorted_bucket_ids_.size() - 1;
|
||||
PrepareKVAtCurrIdx();
|
||||
}
|
||||
|
||||
void CuckooTableIterator::Seek(const Slice& target) {
|
||||
// We assume that the target is an internal key. If this is last level file,
|
||||
// we need to take only the user key part to seek.
|
||||
Slice target_to_search = reader_->is_last_level_ ?
|
||||
ExtractUserKey(target) : target;
|
||||
auto seek_it = std::lower_bound(key_to_bucket_id_.begin(),
|
||||
key_to_bucket_id_.end(),
|
||||
std::make_pair(target_to_search, 0),
|
||||
comparator_);
|
||||
curr_key_idx_ = std::distance(key_to_bucket_id_.begin(), seek_it);
|
||||
const BucketComparator seek_comparator(
|
||||
reader_->file_data_, reader_->ucomp_,
|
||||
reader_->bucket_length_, reader_->user_key_length_,
|
||||
ExtractUserKey(target));
|
||||
auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(),
|
||||
sorted_bucket_ids_.end(),
|
||||
kInvalidIndex,
|
||||
seek_comparator);
|
||||
curr_key_idx_ = std::distance(sorted_bucket_ids_.begin(), seek_it);
|
||||
PrepareKVAtCurrIdx();
|
||||
}
|
||||
|
||||
bool CuckooTableIterator::Valid() const {
|
||||
return curr_key_idx_ < key_to_bucket_id_.size();
|
||||
return curr_key_idx_ < sorted_bucket_ids_.size();
|
||||
}
|
||||
|
||||
void CuckooTableIterator::PrepareKVAtCurrIdx() {
|
||||
@ -277,15 +297,17 @@ void CuckooTableIterator::PrepareKVAtCurrIdx() {
|
||||
curr_key_.Clear();
|
||||
return;
|
||||
}
|
||||
uint64_t offset = ((uint64_t) key_to_bucket_id_[curr_key_idx_].second
|
||||
* reader_->bucket_length_) + reader_->key_length_;
|
||||
status_ = reader_->file_->Read(offset, reader_->value_length_,
|
||||
&curr_value_, nullptr);
|
||||
uint32_t id = sorted_bucket_ids_[curr_key_idx_];
|
||||
const char* offset = reader_->file_data_.data() +
|
||||
id * reader_->bucket_length_;
|
||||
if (reader_->is_last_level_) {
|
||||
// Always return internal key.
|
||||
curr_key_.SetInternalKey(
|
||||
key_to_bucket_id_[curr_key_idx_].first, 0, kTypeValue);
|
||||
curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_),
|
||||
0, kTypeValue);
|
||||
} else {
|
||||
curr_key_.SetKey(Slice(offset, reader_->key_length_));
|
||||
}
|
||||
curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
|
||||
}
|
||||
|
||||
void CuckooTableIterator::Next() {
|
||||
@ -300,7 +322,7 @@ void CuckooTableIterator::Next() {
|
||||
|
||||
void CuckooTableIterator::Prev() {
|
||||
if (curr_key_idx_ == 0) {
|
||||
curr_key_idx_ = key_to_bucket_id_.size();
|
||||
curr_key_idx_ = sorted_bucket_ids_.size();
|
||||
}
|
||||
if (!Valid()) {
|
||||
curr_value_.clear();
|
||||
@ -313,11 +335,7 @@ void CuckooTableIterator::Prev() {
|
||||
|
||||
Slice CuckooTableIterator::key() const {
|
||||
assert(Valid());
|
||||
if (reader_->is_last_level_) {
|
||||
return curr_key_.GetKey();
|
||||
} else {
|
||||
return key_to_bucket_id_[curr_key_idx_].first;
|
||||
}
|
||||
return curr_key_.GetKey();
|
||||
}
|
||||
|
||||
Slice CuckooTableIterator::value() const {
|
||||
|
@ -71,6 +71,7 @@ class CuckooTableReader: public TableReader {
|
||||
uint32_t num_hash_func_;
|
||||
std::string unused_key_;
|
||||
uint32_t key_length_;
|
||||
uint32_t user_key_length_;
|
||||
uint32_t value_length_;
|
||||
uint32_t bucket_length_;
|
||||
uint32_t cuckoo_block_size_;
|
||||
|
Loading…
Reference in New Issue
Block a user