rocksdb/util/dynamic_bloom.h
sdong 462796697c dynamic_bloom: replace some divide (remainder) operations with shifts in locality mode, and other improvements
Summary:
This patch changes meaning of options.bloom_locality: 0 means disable cache line optimization and any positive number means use CACHE_LINE_SIZE as block size (the previous behavior is the block size will be CACHE_LINE_SIZE*options.bloom_locality). By doing it, the divide operations inside a block can be replaced by a shift.

Performance is improved:
https://reviews.facebook.net/P471

Also, improve the basic algorithm in two ways:
(1) make sure num of blocks is an odd number
(2) rotate bytes after every probe in locality mode. Since the divider is 2^n, unless doing it, we are never able to use all the bits.
Improvements of false positive: https://reviews.facebook.net/P459

Test Plan: make all check

Reviewers: ljin, haobo

Reviewed By: haobo

Subscribers: dhruba, yhchiang, igor, leveldb

Differential Revision: https://reviews.facebook.net/D18843
2014-06-02 17:36:38 -07:00

120 lines
3.9 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#include <atomic>
#include <memory>
#include "port/port.h"
#include <util/arena.h>
namespace rocksdb {
class Slice;
class Logger;
class DynamicBloom {
public:
// total_bits: fixed total bits for the bloom
// num_probes: number of hash probes for a single key
// locality: If positive, optimize for cache line locality, 0 otherwise.
// hash_func: customized hash function
// huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
// withi this page size. Need to reserve huge pages for
// it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt
explicit DynamicBloom(uint32_t total_bits, uint32_t locality = 0,
uint32_t num_probes = 6,
uint32_t (*hash_func)(const Slice& key) = nullptr,
size_t huge_page_tlb_size = 0,
Logger* logger = nullptr);
~DynamicBloom() {}
// Assuming single threaded access to this function.
void Add(const Slice& key);
// Assuming single threaded access to this function.
void AddHash(uint32_t hash);
// Multithreaded access to this function is OK
bool MayContain(const Slice& key);
// Multithreaded access to this function is OK
bool MayContainHash(uint32_t hash);
private:
const uint32_t kTotalBits;
const uint32_t kNumBlocks;
const uint32_t kNumProbes;
uint32_t (*hash_func_)(const Slice& key);
unsigned char* data_;
unsigned char* raw_;
Arena arena_;
};
inline void DynamicBloom::Add(const Slice& key) { AddHash(hash_func_(key)); }
inline bool DynamicBloom::MayContain(const Slice& key) {
return (MayContainHash(hash_func_(key)));
}
inline bool DynamicBloom::MayContainHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
if (((data_[bitpos / 8]) & (1 << (bitpos % 8))) == 0) {
return false;
}
h += delta;
}
}
return true;
}
inline void DynamicBloom::AddHash(uint32_t h) {
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
if (kNumBlocks != 0) {
uint32_t b = ((h >> 11 | (h << 21)) % kNumBlocks) * (CACHE_LINE_SIZE * 8);
for (uint32_t i = 0; i < kNumProbes; ++i) {
// Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized
// to a simple and operation by compiler.
const uint32_t bitpos = b + (h % (CACHE_LINE_SIZE * 8));
data_[bitpos / 8] |= (1 << (bitpos % 8));
// Rotate h so that we don't reuse the same bytes.
h = h / (CACHE_LINE_SIZE * 8) +
(h % (CACHE_LINE_SIZE * 8)) * (0x20000000U / CACHE_LINE_SIZE);
h += delta;
}
} else {
for (uint32_t i = 0; i < kNumProbes; ++i) {
const uint32_t bitpos = h % kTotalBits;
data_[bitpos / 8] |= (1 << (bitpos % 8));
h += delta;
}
}
}
} // rocksdb