Add a hash-index component for block
Summary: this is the key component extracted from diff: https://reviews.facebook.net/D14271 I separate it to a dedicated patch to make the review easier. Test Plan: added a unit test and passed it. Reviewers: haobo, sdong, dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D16245
This commit is contained in:
parent
6b9da48a03
commit
906f3dca72
4
Makefile
4
Makefile
@ -55,6 +55,7 @@ VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full
|
||||
|
||||
TESTS = \
|
||||
db_test \
|
||||
block_hash_index_test \
|
||||
autovector_test \
|
||||
table_properties_collector_test \
|
||||
arena_test \
|
||||
@ -227,6 +228,9 @@ $(LIBRARY): $(LIBOBJECTS)
|
||||
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
$(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
block_hash_index_test: table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
db_stress: tools/db_stress.o $(LIBOBJECTS) $(TESTUTIL)
|
||||
$(CXX) tools/db_stress.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
|
@ -26,15 +26,6 @@
|
||||
#include "util/statistics.h"
|
||||
#include "util/stop_watch.h"
|
||||
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<rocksdb::Slice> {
|
||||
size_t operator()(const rocksdb::Slice& slice) const {
|
||||
return MurmurHash(slice.data(), slice.size(), 0);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
|
||||
@ -167,7 +158,8 @@ Iterator* MemTable::NewIterator(const ReadOptions& options) {
|
||||
}
|
||||
|
||||
port::RWMutex* MemTable::GetLock(const Slice& key) {
|
||||
return &locks_[std::hash<Slice>()(key) % locks_.size()];
|
||||
static murmur_hash hash;
|
||||
return &locks_[hash(key) % locks_.size()];
|
||||
}
|
||||
|
||||
void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
|
112
table/block_hash_index.cc
Normal file
112
table/block_hash_index.cc
Normal file
@ -0,0 +1,112 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "table/block_hash_index.h"
|
||||
#include "rocksdb/comparator.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter,
|
||||
const uint32_t num_restarts,
|
||||
const Comparator* comparator,
|
||||
const SliceTransform* hash_key_extractor) {
|
||||
assert(hash_key_extractor);
|
||||
auto hash_index = new BlockHashIndex(hash_key_extractor);
|
||||
uint64_t current_restart_index = 0;
|
||||
|
||||
std::string pending_entry_prefix;
|
||||
// pending_block_num == 0 also implies there is no entry inserted at all.
|
||||
uint32_t pending_block_num = 0;
|
||||
uint32_t pending_entry_index = 0;
|
||||
|
||||
// scan all the entries and create a hash index based on their prefixes.
|
||||
data_iter->SeekToFirst();
|
||||
for (index_iter->SeekToFirst();
|
||||
index_iter->Valid() && current_restart_index < num_restarts;
|
||||
index_iter->Next()) {
|
||||
Slice last_key_in_block = index_iter->key();
|
||||
assert(data_iter->Valid() && data_iter->status().ok());
|
||||
|
||||
// scan through all entries within a data block.
|
||||
while (data_iter->Valid() &&
|
||||
comparator->Compare(data_iter->key(), last_key_in_block) <= 0) {
|
||||
auto key_prefix = hash_key_extractor->Transform(data_iter->key());
|
||||
bool is_first_entry = pending_block_num == 0;
|
||||
|
||||
// Keys may share the prefix
|
||||
if (is_first_entry || pending_entry_prefix != key_prefix) {
|
||||
if (!is_first_entry) {
|
||||
bool succeeded = hash_index->Add(
|
||||
pending_entry_prefix, pending_entry_index, pending_block_num);
|
||||
if (!succeeded) {
|
||||
delete hash_index;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// update the status.
|
||||
// needs a hard copy otherwise the underlying data changes all the time.
|
||||
pending_entry_prefix = key_prefix.ToString();
|
||||
pending_block_num = 1;
|
||||
pending_entry_index = current_restart_index;
|
||||
} else {
|
||||
// entry number increments when keys share the prefix reside in
|
||||
// differnt data blocks.
|
||||
auto last_restart_index = pending_entry_index + pending_block_num - 1;
|
||||
assert(last_restart_index <= current_restart_index);
|
||||
if (last_restart_index != current_restart_index) {
|
||||
++pending_block_num;
|
||||
}
|
||||
}
|
||||
data_iter->Next();
|
||||
}
|
||||
|
||||
++current_restart_index;
|
||||
}
|
||||
|
||||
// make sure all entries has been scaned.
|
||||
assert(!index_iter->Valid());
|
||||
assert(!data_iter->Valid());
|
||||
|
||||
if (pending_block_num > 0) {
|
||||
auto succeeded = hash_index->Add(pending_entry_prefix, pending_entry_index,
|
||||
pending_block_num);
|
||||
if (!succeeded) {
|
||||
delete hash_index;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return hash_index;
|
||||
}
|
||||
|
||||
bool BlockHashIndex::Add(const Slice& prefix, uint32_t restart_index,
|
||||
uint32_t num_blocks) {
|
||||
auto prefix_ptr = arena_.Allocate(prefix.size());
|
||||
std::copy(prefix.data() /* begin */, prefix.data() + prefix.size() /* end */,
|
||||
prefix_ptr /* destination */);
|
||||
auto result =
|
||||
restart_indices_.insert({Slice(prefix_ptr, prefix.size()),
|
||||
RestartIndex(restart_index, num_blocks)});
|
||||
return result.second;
|
||||
}
|
||||
|
||||
const BlockHashIndex::RestartIndex* BlockHashIndex::GetRestartIndex(
|
||||
const Slice& key) {
|
||||
auto key_prefix = hash_key_extractor_->Transform(key);
|
||||
|
||||
auto pos = restart_indices_.find(key_prefix);
|
||||
if (pos == restart_indices_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &pos->second;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
72
table/block_hash_index.h
Normal file
72
table/block_hash_index.h
Normal file
@ -0,0 +1,72 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "util/arena.h"
|
||||
#include "util/murmurhash.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class Comparator;
|
||||
class Iterator;
|
||||
class Slice;
|
||||
class SliceTransform;
|
||||
|
||||
// Build a hash-based index to speed up the lookup for "index block".
|
||||
// BlockHashIndex accepts a key and, if found, returns its restart index within
|
||||
// that index block.
|
||||
class BlockHashIndex {
|
||||
public:
|
||||
// Represents a restart index in the index block's restart array.
|
||||
struct RestartIndex {
|
||||
explicit RestartIndex(uint32_t first_index, uint32_t num_blocks = 1)
|
||||
: first_index(first_index), num_blocks(num_blocks) {}
|
||||
|
||||
// For a given prefix, what is the restart index for the first data block
|
||||
// that contains it.
|
||||
uint32_t first_index = 0;
|
||||
|
||||
// How many data blocks contains this prefix?
|
||||
uint32_t num_blocks = 1;
|
||||
};
|
||||
|
||||
explicit BlockHashIndex(const SliceTransform* hash_key_extractor)
|
||||
: hash_key_extractor_(hash_key_extractor) {}
|
||||
|
||||
// Maps a key to its restart first_index.
|
||||
// Returns nullptr if the restart first_index is found
|
||||
const RestartIndex* GetRestartIndex(const Slice& key);
|
||||
|
||||
bool Add(const Slice& key_prefix, uint32_t restart_index,
|
||||
uint32_t num_blocks);
|
||||
|
||||
size_t ApproximateMemoryUsage() const {
|
||||
return arena_.ApproximateMemoryUsage();
|
||||
}
|
||||
|
||||
private:
|
||||
const SliceTransform* hash_key_extractor_;
|
||||
std::unordered_map<Slice, RestartIndex, murmur_hash> restart_indices_;
|
||||
Arena arena_;
|
||||
};
|
||||
|
||||
// Create hash index by scanning the entries in index as well as the whole
|
||||
// dataset.
|
||||
// @params index_iter: an iterator with the pointer to the first entry in a
|
||||
// block.
|
||||
// @params data_iter: an iterator that can scan all the entries reside in a
|
||||
// table.
|
||||
// @params num_restarts: used for correctness verification.
|
||||
// @params hash_key_extractor: extract the hashable part of a given key.
|
||||
// On error, nullptr will be returned.
|
||||
BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter,
|
||||
const uint32_t num_restarts,
|
||||
const Comparator* comparator,
|
||||
const SliceTransform* hash_key_extractor);
|
||||
|
||||
} // namespace rocksdb
|
117
table/block_hash_index_test.cc
Normal file
117
table/block_hash_index_test.cc
Normal file
@ -0,0 +1,117 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "rocksdb/comparator.h"
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
#include "table/block_hash_index.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
typedef std::map<std::string, std::string> Data;
|
||||
|
||||
class MapIterator : public Iterator {
|
||||
public:
|
||||
explicit MapIterator(const Data& data) : data_(data), pos_(data_.end()) {}
|
||||
|
||||
virtual bool Valid() const { return pos_ != data_.end(); }
|
||||
|
||||
virtual void SeekToFirst() { pos_ = data_.begin(); }
|
||||
|
||||
virtual void SeekToLast() {
|
||||
pos_ = data_.end();
|
||||
--pos_;
|
||||
}
|
||||
|
||||
virtual void Seek(const Slice& target) {
|
||||
pos_ = data_.find(target.ToString());
|
||||
}
|
||||
|
||||
virtual void Next() { ++pos_; }
|
||||
|
||||
virtual void Prev() { --pos_; }
|
||||
|
||||
virtual Slice key() const { return pos_->first; }
|
||||
|
||||
virtual Slice value() const { return pos_->second; }
|
||||
|
||||
virtual Status status() const { return Status::OK(); }
|
||||
|
||||
private:
|
||||
const Data& data_;
|
||||
Data::const_iterator pos_;
|
||||
};
|
||||
|
||||
class BlockTest {};
|
||||
|
||||
TEST(BlockTest, BasicTest) {
|
||||
const size_t keys_per_block = 4;
|
||||
const size_t prefix_size = 2;
|
||||
std::vector<std::string> keys = {/* block 1 */
|
||||
"0101", "0102", "0103", "0201",
|
||||
/* block 2 */
|
||||
"0202", "0203", "0301", "0401",
|
||||
/* block 3 */
|
||||
"0501", "0601", "0701", "0801",
|
||||
/* block 4 */
|
||||
"0802", "0803", "0804", "0805",
|
||||
/* block 5 */
|
||||
"0806", "0807", "0808", "0809", };
|
||||
|
||||
Data data_entries;
|
||||
for (const auto key : keys) {
|
||||
data_entries.insert({key, key});
|
||||
}
|
||||
|
||||
Data index_entries;
|
||||
for (size_t i = 3; i < keys.size(); i += keys_per_block) {
|
||||
// simply ignore the value part
|
||||
index_entries.insert({keys[i], ""});
|
||||
}
|
||||
|
||||
MapIterator data_iter(data_entries);
|
||||
MapIterator index_iter(index_entries);
|
||||
|
||||
auto prefix_extractor = NewFixedPrefixTransform(prefix_size);
|
||||
std::unique_ptr<BlockHashIndex> block_hash_index(
|
||||
CreateBlockHashIndex(&index_iter, &data_iter, index_entries.size(),
|
||||
BytewiseComparator(), prefix_extractor));
|
||||
|
||||
std::map<std::string, BlockHashIndex::RestartIndex> expected = {
|
||||
{"01xx", BlockHashIndex::RestartIndex(0, 1)},
|
||||
{"02yy", BlockHashIndex::RestartIndex(0, 2)},
|
||||
{"03zz", BlockHashIndex::RestartIndex(1, 1)},
|
||||
{"04pp", BlockHashIndex::RestartIndex(1, 1)},
|
||||
{"05ww", BlockHashIndex::RestartIndex(2, 1)},
|
||||
{"06xx", BlockHashIndex::RestartIndex(2, 1)},
|
||||
{"07pp", BlockHashIndex::RestartIndex(2, 1)},
|
||||
{"08xz", BlockHashIndex::RestartIndex(2, 3)}, };
|
||||
|
||||
const BlockHashIndex::RestartIndex* index = nullptr;
|
||||
// search existed prefixes
|
||||
for (const auto& item : expected) {
|
||||
index = block_hash_index->GetRestartIndex(item.first);
|
||||
ASSERT_TRUE(index != nullptr);
|
||||
ASSERT_EQ(item.second.first_index, index->first_index);
|
||||
ASSERT_EQ(item.second.num_blocks, index->num_blocks);
|
||||
}
|
||||
|
||||
// search non exist prefixes
|
||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("00xx"));
|
||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("10yy"));
|
||||
ASSERT_TRUE(!block_hash_index->GetRestartIndex("20zz"));
|
||||
|
||||
delete prefix_extractor;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|
@ -39,12 +39,12 @@ class Arena {
|
||||
// Returns an estimate of the total memory usage of data allocated
|
||||
// by the arena (exclude the space allocated but not yet used for future
|
||||
// allocations).
|
||||
const size_t ApproximateMemoryUsage() {
|
||||
size_t ApproximateMemoryUsage() const {
|
||||
return blocks_memory_ + blocks_.capacity() * sizeof(char*) -
|
||||
alloc_bytes_remaining_;
|
||||
}
|
||||
|
||||
const size_t MemoryAllocatedBytes() { return blocks_memory_; }
|
||||
size_t MemoryAllocatedBytes() const { return blocks_memory_; }
|
||||
|
||||
private:
|
||||
// Number of bytes allocated in one block
|
||||
|
@ -46,5 +46,4 @@ uint32_t Hash(const char* data, size_t n, uint32_t seed) {
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -11,6 +11,7 @@
|
||||
*/
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include "rocksdb/slice.h"
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define MURMUR_HASH MurmurHash64A
|
||||
@ -29,5 +30,13 @@ typedef unsigned int murmur_t;
|
||||
unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed );
|
||||
#define MurmurHash MurmurHashNeutral2
|
||||
typedef unsigned int murmur_t;
|
||||
|
||||
#endif
|
||||
|
||||
// Allow slice to be hashable by murmur hash.
|
||||
namespace rocksdb {
|
||||
struct murmur_hash {
|
||||
size_t operator()(const Slice& slice) const {
|
||||
return MurmurHash(slice.data(), slice.size(), 0);
|
||||
}
|
||||
};
|
||||
} // rocksdb
|
||||
|
Loading…
Reference in New Issue
Block a user