handle kDelete type in cuckoo builder
Summary: when I changed std::vector<std::string, std::string> to std::string to store key/value pairs in builder, I missed the handling for kDeletion type. As a result, value_size_ can be wrong if the first add key is for deletion. The is captured by ./cuckoo_table_db_test Test Plan: ./cuckoo_table_db_test ./cuckoo_table_reader_test ./cuckoo_table_builder_test Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24045
This commit is contained in:
parent
389edb6b1b
commit
2dc6f62bb9
@ -60,9 +60,11 @@ CuckooTableBuilder::CuckooTableBuilder(
|
|||||||
hash_table_size_(use_module_hash ? 0 : 2),
|
hash_table_size_(use_module_hash ? 0 : 2),
|
||||||
is_last_level_file_(false),
|
is_last_level_file_(false),
|
||||||
has_seen_first_key_(false),
|
has_seen_first_key_(false),
|
||||||
|
has_seen_first_value_(false),
|
||||||
key_size_(0),
|
key_size_(0),
|
||||||
value_size_(0),
|
value_size_(0),
|
||||||
num_entries_(0),
|
num_entries_(0),
|
||||||
|
num_values_(0),
|
||||||
ucomp_(user_comparator),
|
ucomp_(user_comparator),
|
||||||
use_module_hash_(use_module_hash),
|
use_module_hash_(use_module_hash),
|
||||||
identity_as_first_hash_(identity_as_first_hash),
|
identity_as_first_hash_(identity_as_first_hash),
|
||||||
@ -84,6 +86,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
status_ = Status::Corruption("Unable to parse key into inernal key.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) {
|
||||||
|
status_ = Status::NotSupported("Unsupported key type " +
|
||||||
|
std::to_string(ikey.type));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine if we can ignore the sequence number and value type from
|
// Determine if we can ignore the sequence number and value type from
|
||||||
// internal keys by looking at sequence number from first key. We assume
|
// internal keys by looking at sequence number from first key. We assume
|
||||||
// that if first key has a zero sequence number, then all the remaining
|
// that if first key has a zero sequence number, then all the remaining
|
||||||
@ -94,16 +102,38 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
||||||
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
||||||
key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size();
|
key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size();
|
||||||
value_size_ = value.size();
|
}
|
||||||
|
if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) {
|
||||||
|
status_ = Status::NotSupported("all keys have to be the same size");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
// Even if one sequence number is non-zero, then it is not last level.
|
// Even if one sequence number is non-zero, then it is not last level.
|
||||||
assert(!is_last_level_file_ || ikey.sequence == 0);
|
assert(!is_last_level_file_ || ikey.sequence == 0);
|
||||||
if (is_last_level_file_) {
|
|
||||||
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
|
if (ikey.type == kTypeValue) {
|
||||||
|
if (!has_seen_first_value_) {
|
||||||
|
has_seen_first_value_ = true;
|
||||||
|
value_size_ = value.size();
|
||||||
|
}
|
||||||
|
if (value_size_ != value.size()) {
|
||||||
|
status_ = Status::NotSupported("all values have to be the same size");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_last_level_file_) {
|
||||||
|
kvs_.append(ikey.user_key.data(), ikey.user_key.size());
|
||||||
|
} else {
|
||||||
|
kvs_.append(key.data(), key.size());
|
||||||
|
}
|
||||||
|
kvs_.append(value.data(), value.size());
|
||||||
|
++num_values_;
|
||||||
} else {
|
} else {
|
||||||
kvs_.append(key.data(), key.size());
|
if (is_last_level_file_) {
|
||||||
|
deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size());
|
||||||
|
} else {
|
||||||
|
deleted_keys_.append(key.data(), key.size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
kvs_.append(value.data(), value.size());
|
|
||||||
++num_entries_;
|
++num_entries_;
|
||||||
|
|
||||||
// In order to fill the empty buckets in the hash table, we identify a
|
// In order to fill the empty buckets in the hash table, we identify a
|
||||||
@ -123,15 +153,30 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const {
|
||||||
|
assert(closed_);
|
||||||
|
return idx >= num_values_;
|
||||||
|
}
|
||||||
|
|
||||||
Slice CuckooTableBuilder::GetKey(uint64_t idx) const {
|
Slice CuckooTableBuilder::GetKey(uint64_t idx) const {
|
||||||
|
assert(closed_);
|
||||||
|
if (IsDeletedKey(idx)) {
|
||||||
|
return Slice(&deleted_keys_[(idx - num_values_) * key_size_], key_size_);
|
||||||
|
}
|
||||||
return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_);
|
return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_);
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const {
|
Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const {
|
||||||
|
assert(closed_);
|
||||||
return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx));
|
return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice CuckooTableBuilder::GetValue(uint64_t idx) const {
|
Slice CuckooTableBuilder::GetValue(uint64_t idx) const {
|
||||||
|
assert(closed_);
|
||||||
|
if (IsDeletedKey(idx)) {
|
||||||
|
static std::string empty_value(value_size_, 'a');
|
||||||
|
return Slice(empty_value);
|
||||||
|
}
|
||||||
return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_);
|
return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -256,7 +301,9 @@ Status CuckooTableBuilder::Finish() {
|
|||||||
++num_added;
|
++num_added;
|
||||||
s = file_->Append(GetKey(bucket.vector_idx));
|
s = file_->Append(GetKey(bucket.vector_idx));
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = file_->Append(GetValue(bucket.vector_idx));
|
if (value_size_ > 0) {
|
||||||
|
s = file_->Append(GetValue(bucket.vector_idx));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
|
@ -75,6 +75,7 @@ class CuckooTableBuilder: public TableBuilder {
|
|||||||
uint64_t* bucket_id);
|
uint64_t* bucket_id);
|
||||||
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
|
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
|
||||||
|
|
||||||
|
inline bool IsDeletedKey(uint64_t idx) const;
|
||||||
inline Slice GetKey(uint64_t idx) const;
|
inline Slice GetKey(uint64_t idx) const;
|
||||||
inline Slice GetUserKey(uint64_t idx) const;
|
inline Slice GetUserKey(uint64_t idx) const;
|
||||||
inline Slice GetValue(uint64_t idx) const;
|
inline Slice GetValue(uint64_t idx) const;
|
||||||
@ -88,14 +89,18 @@ class CuckooTableBuilder: public TableBuilder {
|
|||||||
uint64_t hash_table_size_;
|
uint64_t hash_table_size_;
|
||||||
bool is_last_level_file_;
|
bool is_last_level_file_;
|
||||||
bool has_seen_first_key_;
|
bool has_seen_first_key_;
|
||||||
|
bool has_seen_first_value_;
|
||||||
uint64_t key_size_;
|
uint64_t key_size_;
|
||||||
uint64_t value_size_;
|
uint64_t value_size_;
|
||||||
// A list of fixed-size key-value pairs concatenating into a string.
|
// A list of fixed-size key-value pairs concatenating into a string.
|
||||||
// Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific
|
// Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific
|
||||||
// key / value given an index
|
// key / value given an index
|
||||||
std::string kvs_;
|
std::string kvs_;
|
||||||
// Number of key-value pairs stored in kvs_
|
std::string deleted_keys_;
|
||||||
|
// Number of key-value pairs stored in kvs_ + number of deleted keys
|
||||||
uint64_t num_entries_;
|
uint64_t num_entries_;
|
||||||
|
// Number of keys that contain value (non-deletion op)
|
||||||
|
uint64_t num_values_;
|
||||||
Status status_;
|
Status status_;
|
||||||
TableProperties properties_;
|
TableProperties properties_;
|
||||||
const Comparator* ucomp_;
|
const Comparator* ucomp_;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user