bits_per_key is already configurable. It defines how many bloom bits will be used for every key in the database.
My change in this patch is to make the Hash code that is used for blooms to be confgurable. In fact, one can specify a modified HashCode that inspects only parts of the Key to generate the Hash (used by booms). Test Plan: none Differential Revision: https://reviews.facebook.net/D4059
This commit is contained in:
parent
80c663882a
commit
d11b637f34
@ -18,16 +18,27 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
private:
|
private:
|
||||||
size_t bits_per_key_;
|
size_t bits_per_key_;
|
||||||
size_t k_;
|
size_t k_;
|
||||||
|
uint32_t (*hash_func_)(const Slice& key);
|
||||||
|
|
||||||
public:
|
void initialize() {
|
||||||
explicit BloomFilterPolicy(int bits_per_key)
|
|
||||||
: bits_per_key_(bits_per_key) {
|
|
||||||
// We intentionally round down to reduce probing cost a little bit
|
// We intentionally round down to reduce probing cost a little bit
|
||||||
k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
|
k_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2)
|
||||||
if (k_ < 1) k_ = 1;
|
if (k_ < 1) k_ = 1;
|
||||||
if (k_ > 30) k_ = 30;
|
if (k_ > 30) k_ = 30;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit BloomFilterPolicy(int bits_per_key,
|
||||||
|
uint32_t (*hash_func)(const Slice& key))
|
||||||
|
: bits_per_key_(bits_per_key), hash_func_(hash_func) {
|
||||||
|
initialize();
|
||||||
|
}
|
||||||
|
explicit BloomFilterPolicy(int bits_per_key)
|
||||||
|
: bits_per_key_(bits_per_key) {
|
||||||
|
hash_func_ = BloomHash;
|
||||||
|
initialize();
|
||||||
|
}
|
||||||
|
|
||||||
virtual const char* Name() const {
|
virtual const char* Name() const {
|
||||||
return "leveldb.BuiltinBloomFilter";
|
return "leveldb.BuiltinBloomFilter";
|
||||||
}
|
}
|
||||||
@ -50,7 +61,7 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
// Use double-hashing to generate a sequence of hash values.
|
// Use double-hashing to generate a sequence of hash values.
|
||||||
// See analysis in [Kirsch,Mitzenmacher 2006].
|
// See analysis in [Kirsch,Mitzenmacher 2006].
|
||||||
uint32_t h = BloomHash(keys[i]);
|
uint32_t h = hash_func_(keys[i]);
|
||||||
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
for (size_t j = 0; j < k_; j++) {
|
for (size_t j = 0; j < k_; j++) {
|
||||||
const uint32_t bitpos = h % bits;
|
const uint32_t bitpos = h % bits;
|
||||||
@ -76,7 +87,7 @@ class BloomFilterPolicy : public FilterPolicy {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t h = BloomHash(key);
|
uint32_t h = hash_func_(key);
|
||||||
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
|
||||||
for (size_t j = 0; j < k; j++) {
|
for (size_t j = 0; j < k; j++) {
|
||||||
const uint32_t bitpos = h % bits;
|
const uint32_t bitpos = h % bits;
|
||||||
|
Loading…
Reference in New Issue
Block a user