rocksdb/util/heap.h
lovro e1c99e10c1 Replace std::priority_queue in MergingIterator with custom heap, take 2
Summary: Repeat of b6655a679d (reverted in b7a2369fb2) with a proper fix for the issue that 57d216ea65 was trying to fix.

Test Plan:
make check

for i in $(seq 100); do ./db_stress --test_batches_snapshots=1 --threads=32 --write_buffer_size=4194304 --destroy_db_initially=0 --reopen=20 --readpercent=45 --prefixpercent=5 --writepercent=35 --delpercent=5 --iterpercent=10 --db=/tmp/rocksdb_crashtest_KdCI5F --max_key=100000000 --mmap_read=0 --block_size=16384 --cache_size=1048576 --open_files=500000 --verify_checksum=1 --sync=0 --progress_reports=0 --disable_wal=0 --disable_data_sync=1 --target_file_size_base=2097152 --target_file_size_multiplier=2 --max_write_buffer_number=3 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --filter_deletes=0 --memtablerep=prefix_hash --prefix_size=7 --ops_per_thread=200 || break; done

Reviewers: anthony, sdong, igor, yhchiang

Reviewed By: igor, yhchiang

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D41391
2015-07-15 03:34:40 -07:00

141 lines
4.0 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#include <algorithm>
#include <cstdint>
#include <functional>
#include "util/autovector.h"
namespace rocksdb {
// Binary heap implementation optimized for use in multi-way merge sort.
// Comparison to std::priority_queue:
// - In libstdc++, std::priority_queue::pop() usually performs just over logN
// comparisons but never fewer.
// - std::priority_queue does not have a replace-top operation, requiring a
// pop+push. If the replacement element is the new top, this requires
// around 2logN comparisons.
// - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
// comparisons.
// - This heap provides a replace_top() operation which requires [1, 2logN]
// comparisons. When the replacement element is also the new top, this
// takes just 1 or 2 comparisons.
//
// The last property can yield an order-of-magnitude performance improvement
// when merge-sorting real-world non-random data. If the merge operation is
// likely to take chunks of elements from the same input stream, only 1
// comparison per element is needed. In RocksDB-land, this happens when
// compacting a database where keys are not randomly distributed across L0
// files but nearby keys are likely to be in the same L0 file.
//
// The container uses the same counterintuitive ordering as
// std::priority_queue: the comparison operator is expected to provide the
// less-than relation, but top() will return the maximum.
template<typename T, typename Compare = std::less<T>>
class BinaryHeap {
public:
BinaryHeap() { }
explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { }
void push(const T& value) {
data_.push_back(value);
upheap(data_.size() - 1);
}
void push(T&& value) {
data_.push_back(std::move(value));
upheap(data_.size() - 1);
}
const T& top() const {
assert(!empty());
return data_.front();
}
void replace_top(const T& value) {
assert(!empty());
data_.front() = value;
downheap(get_root());
}
void replace_top(T&& value) {
assert(!empty());
data_.front() = std::move(value);
downheap(get_root());
}
void pop() {
assert(!empty());
data_.front() = std::move(data_.back());
data_.pop_back();
if (!empty()) {
downheap(get_root());
}
}
void swap(BinaryHeap &other) {
std::swap(cmp_, other.cmp_);
data_.swap(other.data_);
}
void clear() {
data_.clear();
}
bool empty() const {
return data_.empty();
}
private:
static inline size_t get_root() { return 0; }
static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
static inline size_t get_left(size_t index) { return 2 * index + 1; }
static inline size_t get_right(size_t index) { return 2 * index + 2; }
void upheap(size_t index) {
T v = std::move(data_[index]);
while (index > get_root()) {
const size_t parent = get_parent(index);
if (!cmp_(data_[parent], v)) {
break;
}
data_[index] = std::move(data_[parent]);
index = parent;
}
data_[index] = std::move(v);
}
void downheap(size_t index) {
T v = std::move(data_[index]);
while (1) {
const size_t left_child = get_left(index);
if (get_left(index) >= data_.size()) {
break;
}
const size_t right_child = left_child + 1;
assert(right_child == get_right(index));
size_t picked_child = left_child;
if (right_child < data_.size() &&
cmp_(data_[left_child], data_[right_child])) {
picked_child = right_child;
}
if (!cmp_(v, data_[picked_child])) {
break;
}
data_[index] = std::move(data_[picked_child]);
index = picked_child;
}
data_[index] = std::move(v);
}
Compare cmp_;
autovector<T> data_;
};
} // namespace rocksdb