delay initialization of cuckoo table iterator

Summary:
cuckoo table iterator creation is quite expensive since it needs to load
all data and sort them. After compaction, RocksDB creates a new iterator
of the new file to make sure it is in good state. That makes the DB
creation quite slow. Delay the iterator db sort to the seek time to
speed it up.

Test Plan: db_bench

Reviewers: igor, yhchiang, sdong

Reviewed By: sdong

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D23775
This commit is contained in:
Lei Jin 2014-09-25 16:45:37 -07:00
parent 94997eab5e
commit d439451fab

View File

@ -191,7 +191,7 @@ class CuckooTableIterator : public Iterator {
Slice key() const override; Slice key() const override;
Slice value() const override; Slice value() const override;
Status status() const override { return status_; } Status status() const override { return status_; }
void LoadKeysFromReader(); void InitIfNeeded();
private: private:
struct BucketComparator { struct BucketComparator {
@ -224,6 +224,7 @@ class CuckooTableIterator : public Iterator {
const BucketComparator bucket_comparator_; const BucketComparator bucket_comparator_;
void PrepareKVAtCurrIdx(); void PrepareKVAtCurrIdx();
CuckooTableReader* reader_; CuckooTableReader* reader_;
bool initialized_;
Status status_; Status status_;
// Contains a map of keys to bucket_id sorted in key order. // Contains a map of keys to bucket_id sorted in key order.
std::vector<uint32_t> sorted_bucket_ids_; std::vector<uint32_t> sorted_bucket_ids_;
@ -240,13 +241,17 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
: bucket_comparator_(reader->file_data_, reader->ucomp_, : bucket_comparator_(reader->file_data_, reader->ucomp_,
reader->bucket_length_, reader->user_key_length_), reader->bucket_length_, reader->user_key_length_),
reader_(reader), reader_(reader),
initialized_(false),
curr_key_idx_(kInvalidIndex) { curr_key_idx_(kInvalidIndex) {
sorted_bucket_ids_.clear(); sorted_bucket_ids_.clear();
curr_value_.clear(); curr_value_.clear();
curr_key_.Clear(); curr_key_.Clear();
} }
void CuckooTableIterator::LoadKeysFromReader() { void CuckooTableIterator::InitIfNeeded() {
if (initialized_) {
return;
}
sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries); sorted_bucket_ids_.reserve(reader_->GetTableProperties()->num_entries);
uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
assert(num_buckets < kInvalidIndex); assert(num_buckets < kInvalidIndex);
@ -262,19 +267,23 @@ void CuckooTableIterator::LoadKeysFromReader() {
std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
bucket_comparator_); bucket_comparator_);
curr_key_idx_ = kInvalidIndex; curr_key_idx_ = kInvalidIndex;
initialized_ = true;
} }
void CuckooTableIterator::SeekToFirst() { void CuckooTableIterator::SeekToFirst() {
InitIfNeeded();
curr_key_idx_ = 0; curr_key_idx_ = 0;
PrepareKVAtCurrIdx(); PrepareKVAtCurrIdx();
} }
void CuckooTableIterator::SeekToLast() { void CuckooTableIterator::SeekToLast() {
InitIfNeeded();
curr_key_idx_ = sorted_bucket_ids_.size() - 1; curr_key_idx_ = sorted_bucket_ids_.size() - 1;
PrepareKVAtCurrIdx(); PrepareKVAtCurrIdx();
} }
void CuckooTableIterator::Seek(const Slice& target) { void CuckooTableIterator::Seek(const Slice& target) {
InitIfNeeded();
const BucketComparator seek_comparator( const BucketComparator seek_comparator(
reader_->file_data_, reader_->ucomp_, reader_->file_data_, reader_->ucomp_,
reader_->bucket_length_, reader_->user_key_length_, reader_->bucket_length_, reader_->user_key_length_,
@ -362,9 +371,6 @@ Iterator* CuckooTableReader::NewIterator(
auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator)); auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator));
iter = new (iter_mem) CuckooTableIterator(this); iter = new (iter_mem) CuckooTableIterator(this);
} }
if (iter->status().ok()) {
iter->LoadKeysFromReader();
}
return iter; return iter;
} }