Enable hash index for block-based table
Summary: Based on previous patches, this diff eventually provides the end-to-end mechanism for users to specify the hash-index. Test Plan: Wrote several new unit tests. Reviewers: sdong, haobo, dhruba Reviewed By: sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D16539
This commit is contained in:
parent
7a92537fc4
commit
75b59d5146
@ -266,6 +266,8 @@ class DBTest {
|
|||||||
// Sequence of option configurations to try
|
// Sequence of option configurations to try
|
||||||
enum OptionConfig {
|
enum OptionConfig {
|
||||||
kDefault,
|
kDefault,
|
||||||
|
kBlockBasedTableWithPrefixHashIndex,
|
||||||
|
kBlockBasedTableWithWholeKeyHashIndex,
|
||||||
kPlainTableFirstBytePrefix,
|
kPlainTableFirstBytePrefix,
|
||||||
kPlainTableAllBytesPrefix,
|
kPlainTableAllBytesPrefix,
|
||||||
kVectorRep,
|
kVectorRep,
|
||||||
@ -303,7 +305,8 @@ class DBTest {
|
|||||||
kSkipDeletesFilterFirst = 1,
|
kSkipDeletesFilterFirst = 1,
|
||||||
kSkipUniversalCompaction = 2,
|
kSkipUniversalCompaction = 2,
|
||||||
kSkipMergePut = 4,
|
kSkipMergePut = 4,
|
||||||
kSkipPlainTable = 8
|
kSkipPlainTable = 8,
|
||||||
|
kSkipHashIndex = 16
|
||||||
};
|
};
|
||||||
|
|
||||||
DBTest() : option_config_(kDefault),
|
DBTest() : option_config_(kDefault),
|
||||||
@ -343,6 +346,12 @@ class DBTest {
|
|||||||
|| option_config_ == kPlainTableFirstBytePrefix)) {
|
|| option_config_ == kPlainTableFirstBytePrefix)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if ((skip_mask & kSkipPlainTable) &&
|
||||||
|
(option_config_ == kBlockBasedTableWithPrefixHashIndex ||
|
||||||
|
option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -439,6 +448,20 @@ class DBTest {
|
|||||||
case kInfiniteMaxOpenFiles:
|
case kInfiniteMaxOpenFiles:
|
||||||
options.max_open_files = -1;
|
options.max_open_files = -1;
|
||||||
break;
|
break;
|
||||||
|
case kBlockBasedTableWithPrefixHashIndex: {
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kBlockBasedTableWithWholeKeyHashIndex: {
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||||
|
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
options.prefix_extractor.reset(NewNoopTransform());
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1363,7 +1386,7 @@ TEST(DBTest, KeyMayExist) {
|
|||||||
|
|
||||||
// KeyMayExist function only checks data in block caches, which is not used
|
// KeyMayExist function only checks data in block caches, which is not used
|
||||||
// by plain table format.
|
// by plain table format.
|
||||||
} while (ChangeOptions(kSkipPlainTable));
|
} while (ChangeOptions(kSkipPlainTable | kSkipHashIndex));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, NonBlockingIteration) {
|
TEST(DBTest, NonBlockingIteration) {
|
||||||
@ -6184,7 +6207,9 @@ TEST(DBTest, Randomized) {
|
|||||||
int minimum = 0;
|
int minimum = 0;
|
||||||
if (option_config_ == kHashSkipList ||
|
if (option_config_ == kHashSkipList ||
|
||||||
option_config_ == kHashLinkList ||
|
option_config_ == kHashLinkList ||
|
||||||
option_config_ == kPlainTableFirstBytePrefix) {
|
option_config_ == kPlainTableFirstBytePrefix ||
|
||||||
|
option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
|
||||||
|
option_config_ == kBlockBasedTableWithPrefixHashIndex) {
|
||||||
minimum = 1;
|
minimum = 1;
|
||||||
}
|
}
|
||||||
if (p < 45) { // Put
|
if (p < 45) { // Put
|
||||||
@ -6224,8 +6249,15 @@ TEST(DBTest, Randomized) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ((step % 100) == 0) {
|
if ((step % 100) == 0) {
|
||||||
|
// For DB instances that use the hash index + block-based table, the
|
||||||
|
// iterator will be invalid right when seeking a non-existent key, right
|
||||||
|
// than return a key that is close to it.
|
||||||
|
if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
|
||||||
|
option_config_ != kBlockBasedTableWithPrefixHashIndex) {
|
||||||
ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
|
ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
|
||||||
ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
|
ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
|
||||||
|
}
|
||||||
|
|
||||||
// Save a snapshot from each DB this time that we'll use next
|
// Save a snapshot from each DB this time that we'll use next
|
||||||
// time we compare things, to make sure the current state is
|
// time we compare things, to make sure the current state is
|
||||||
// preserved with the snapshot
|
// preserved with the snapshot
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/filter_policy.h"
|
#include "rocksdb/filter_policy.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/slice_transform.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
#include "rocksdb/types.h"
|
#include "rocksdb/types.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
@ -304,4 +305,34 @@ class IterKey {
|
|||||||
void operator=(const IterKey&) = delete;
|
void operator=(const IterKey&) = delete;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class InternalKeySliceTransform : public SliceTransform {
|
||||||
|
public:
|
||||||
|
explicit InternalKeySliceTransform(const SliceTransform* transform)
|
||||||
|
: transform_(transform) {}
|
||||||
|
|
||||||
|
virtual const char* Name() const { return transform_->Name(); }
|
||||||
|
|
||||||
|
virtual Slice Transform(const Slice& src) const {
|
||||||
|
auto user_key = ExtractUserKey(src);
|
||||||
|
return transform_->Transform(user_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool InDomain(const Slice& src) const {
|
||||||
|
auto user_key = ExtractUserKey(src);
|
||||||
|
return transform_->InDomain(user_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool InRange(const Slice& dst) const {
|
||||||
|
auto user_key = ExtractUserKey(dst);
|
||||||
|
return transform_->InRange(user_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
const SliceTransform* user_prefix_extractor() const { return transform_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Like comparator, InternalKeySliceTransform will not take care of the
|
||||||
|
// deletion of transform_
|
||||||
|
const SliceTransform* const transform_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -60,6 +60,12 @@ struct BlockBasedTableOptions {
|
|||||||
// A space efficient index block that is optimized for
|
// A space efficient index block that is optimized for
|
||||||
// binary-search-based index.
|
// binary-search-based index.
|
||||||
kBinarySearch,
|
kBinarySearch,
|
||||||
|
|
||||||
|
// The hash index, if enabled, will do the hash lookup when
|
||||||
|
// `ReadOption.prefix_seek == true`. User should also specify
|
||||||
|
// `Options.prefix_extractor` to allow the index block to correctly
|
||||||
|
// extract the prefix of the given key and perform hash table lookup.
|
||||||
|
kHashSearch,
|
||||||
};
|
};
|
||||||
|
|
||||||
IndexType index_type = kBinarySearch;
|
IndexType index_type = kBinarySearch;
|
||||||
|
119
table/block.cc
119
table/block.cc
@ -11,16 +11,20 @@
|
|||||||
|
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
|
#include "table/block_hash_index.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
inline uint32_t Block::NumRestarts() const {
|
uint32_t Block::NumRestarts() const {
|
||||||
assert(size_ >= 2*sizeof(uint32_t));
|
assert(size_ >= 2*sizeof(uint32_t));
|
||||||
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
@ -92,6 +96,7 @@ class Block::Iter : public Iterator {
|
|||||||
std::string key_;
|
std::string key_;
|
||||||
Slice value_;
|
Slice value_;
|
||||||
Status status_;
|
Status status_;
|
||||||
|
BlockHashIndex* hash_index_;
|
||||||
|
|
||||||
inline int Compare(const Slice& a, const Slice& b) const {
|
inline int Compare(const Slice& a, const Slice& b) const {
|
||||||
return comparator_->Compare(a, b);
|
return comparator_->Compare(a, b);
|
||||||
@ -118,16 +123,15 @@ class Block::Iter : public Iterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Iter(const Comparator* comparator,
|
Iter(const Comparator* comparator, const char* data, uint32_t restarts,
|
||||||
const char* data,
|
uint32_t num_restarts, BlockHashIndex* hash_index)
|
||||||
uint32_t restarts,
|
|
||||||
uint32_t num_restarts)
|
|
||||||
: comparator_(comparator),
|
: comparator_(comparator),
|
||||||
data_(data),
|
data_(data),
|
||||||
restarts_(restarts),
|
restarts_(restarts),
|
||||||
num_restarts_(num_restarts),
|
num_restarts_(num_restarts),
|
||||||
current_(restarts_),
|
current_(restarts_),
|
||||||
restart_index_(num_restarts_) {
|
restart_index_(num_restarts_),
|
||||||
|
hash_index_(hash_index) {
|
||||||
assert(num_restarts_ > 0);
|
assert(num_restarts_ > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,45 +173,22 @@ class Block::Iter : public Iterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual void Seek(const Slice& target) {
|
virtual void Seek(const Slice& target) {
|
||||||
// Binary search in restart array to find the first restart point
|
uint32_t index = 0;
|
||||||
// with a key >= target
|
bool ok = hash_index_ ? HashSeek(target, &index)
|
||||||
uint32_t left = 0;
|
: BinarySeek(target, 0, num_restarts_ - 1, &index);
|
||||||
uint32_t right = num_restarts_ - 1;
|
|
||||||
while (left < right) {
|
if (!ok) {
|
||||||
uint32_t mid = (left + right + 1) / 2;
|
|
||||||
uint32_t region_offset = GetRestartPoint(mid);
|
|
||||||
uint32_t shared, non_shared, value_length;
|
|
||||||
const char* key_ptr = DecodeEntry(data_ + region_offset,
|
|
||||||
data_ + restarts_,
|
|
||||||
&shared, &non_shared, &value_length);
|
|
||||||
if (key_ptr == nullptr || (shared != 0)) {
|
|
||||||
CorruptionError();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Slice mid_key(key_ptr, non_shared);
|
SeekToRestartPoint(index);
|
||||||
if (Compare(mid_key, target) < 0) {
|
|
||||||
// Key at "mid" is smaller than "target". Therefore all
|
|
||||||
// blocks before "mid" are uninteresting.
|
|
||||||
left = mid;
|
|
||||||
} else {
|
|
||||||
// Key at "mid" is >= "target". Therefore all blocks at or
|
|
||||||
// after "mid" are uninteresting.
|
|
||||||
right = mid - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Linear search (within restart block) for first key >= target
|
// Linear search (within restart block) for first key >= target
|
||||||
SeekToRestartPoint(left);
|
|
||||||
while (true) {
|
|
||||||
if (!ParseNextKey()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (Compare(key_, target) >= 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
if (!ParseNextKey() || Compare(key_, target) >= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
virtual void SeekToFirst() {
|
virtual void SeekToFirst() {
|
||||||
SeekToRestartPoint(0);
|
SeekToRestartPoint(0);
|
||||||
ParseNextKey();
|
ParseNextKey();
|
||||||
@ -257,6 +238,53 @@ class Block::Iter : public Iterator {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Binary search in restart array to find the first restart point
|
||||||
|
// with a key >= target
|
||||||
|
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
|
||||||
|
uint32_t* index) {
|
||||||
|
assert(left <= right);
|
||||||
|
|
||||||
|
while (left < right) {
|
||||||
|
uint32_t mid = (left + right + 1) / 2;
|
||||||
|
uint32_t region_offset = GetRestartPoint(mid);
|
||||||
|
uint32_t shared, non_shared, value_length;
|
||||||
|
const char* key_ptr =
|
||||||
|
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
|
||||||
|
&non_shared, &value_length);
|
||||||
|
if (key_ptr == nullptr || (shared != 0)) {
|
||||||
|
CorruptionError();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Slice mid_key(key_ptr, non_shared);
|
||||||
|
if (Compare(mid_key, target) < 0) {
|
||||||
|
// Key at "mid" is smaller than "target". Therefore all
|
||||||
|
// blocks before "mid" are uninteresting.
|
||||||
|
left = mid;
|
||||||
|
} else {
|
||||||
|
// Key at "mid" is >= "target". Therefore all blocks at or
|
||||||
|
// after "mid" are uninteresting.
|
||||||
|
right = mid - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*index = left;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HashSeek(const Slice& target, uint32_t* index) {
|
||||||
|
assert(hash_index_);
|
||||||
|
auto restart_index = hash_index_->GetRestartIndex(target);
|
||||||
|
if (restart_index == nullptr) {
|
||||||
|
current_ = restarts_;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the elements in restart_array[index : index + num_blocks]
|
||||||
|
// are all with same prefix. We'll do binary search in that small range.
|
||||||
|
auto left = restart_index->first_index;
|
||||||
|
auto right = restart_index->first_index + restart_index->num_blocks - 1;
|
||||||
|
return BinarySeek(target, left, right, index);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Iterator* Block::NewIterator(const Comparator* cmp) {
|
Iterator* Block::NewIterator(const Comparator* cmp) {
|
||||||
@ -267,8 +295,13 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
|
|||||||
if (num_restarts == 0) {
|
if (num_restarts == 0) {
|
||||||
return NewEmptyIterator();
|
return NewEmptyIterator();
|
||||||
} else {
|
} else {
|
||||||
return new Iter(cmp, data_, restart_offset_, num_restarts);
|
return new Iter(cmp, data_, restart_offset_, num_restarts,
|
||||||
|
hash_index_.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
|
||||||
|
hash_index_.reset(hash_index);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
|
|
||||||
@ -17,6 +18,7 @@ namespace rocksdb {
|
|||||||
|
|
||||||
struct BlockContents;
|
struct BlockContents;
|
||||||
class Comparator;
|
class Comparator;
|
||||||
|
class BlockHashIndex;
|
||||||
|
|
||||||
class Block {
|
class Block {
|
||||||
public:
|
public:
|
||||||
@ -26,20 +28,28 @@ class Block {
|
|||||||
~Block();
|
~Block();
|
||||||
|
|
||||||
size_t size() const { return size_; }
|
size_t size() const { return size_; }
|
||||||
|
const char* data() const { return data_; }
|
||||||
bool cachable() const { return cachable_; }
|
bool cachable() const { return cachable_; }
|
||||||
|
uint32_t NumRestarts() const;
|
||||||
CompressionType compression_type() const { return compression_type_; }
|
CompressionType compression_type() const { return compression_type_; }
|
||||||
|
|
||||||
|
// If hash index lookup is enabled and `use_hash_index` is true. This block
|
||||||
|
// will do hash lookup for the key prefix.
|
||||||
|
//
|
||||||
|
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
|
||||||
|
// the iterator will simply be set as "invalid", rather than returning
|
||||||
|
// the key that is just pass the target key.
|
||||||
Iterator* NewIterator(const Comparator* comparator);
|
Iterator* NewIterator(const Comparator* comparator);
|
||||||
const char* data() { return data_; }
|
void SetBlockHashIndex(BlockHashIndex* hash_index);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t NumRestarts() const;
|
|
||||||
|
|
||||||
const char* data_;
|
const char* data_;
|
||||||
size_t size_;
|
size_t size_;
|
||||||
uint32_t restart_offset_; // Offset in data_ of restart array
|
uint32_t restart_offset_; // Offset in data_ of restart array
|
||||||
bool owned_; // Block owns data_[]
|
bool owned_; // Block owns data_[]
|
||||||
bool cachable_;
|
bool cachable_;
|
||||||
CompressionType compression_type_;
|
CompressionType compression_type_;
|
||||||
|
std::unique_ptr<BlockHashIndex> hash_index_;
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
Block(const Block&);
|
Block(const Block&);
|
||||||
|
@ -97,9 +97,9 @@ class IndexBuilder {
|
|||||||
// 2. Shorten the key length for index block. Other than honestly using the
|
// 2. Shorten the key length for index block. Other than honestly using the
|
||||||
// last key in the data block as the index key, we instead find a shortest
|
// last key in the data block as the index key, we instead find a shortest
|
||||||
// substitute key that serves the same function.
|
// substitute key that serves the same function.
|
||||||
class BinarySearchIndexBuilder : public IndexBuilder {
|
class ShortenedIndexBuilder : public IndexBuilder {
|
||||||
public:
|
public:
|
||||||
explicit BinarySearchIndexBuilder(const Comparator* comparator)
|
explicit ShortenedIndexBuilder(const Comparator* comparator)
|
||||||
: IndexBuilder(comparator),
|
: IndexBuilder(comparator),
|
||||||
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
||||||
|
|
||||||
@ -128,11 +128,41 @@ class BinarySearchIndexBuilder : public IndexBuilder {
|
|||||||
BlockBuilder index_block_builder_;
|
BlockBuilder index_block_builder_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like
|
||||||
|
// ShortenedIndexBuilder, but preserves the full key instead the substitude key.
|
||||||
|
// with the reason being that hash index is based on "prefix".
|
||||||
|
class FullKeyIndexBuilder : public IndexBuilder {
|
||||||
|
public:
|
||||||
|
explicit FullKeyIndexBuilder(const Comparator* comparator)
|
||||||
|
: IndexBuilder(comparator),
|
||||||
|
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
||||||
|
|
||||||
|
virtual void AddEntry(std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block,
|
||||||
|
const BlockHandle& block_handle) override {
|
||||||
|
std::string handle_encoding;
|
||||||
|
block_handle.EncodeTo(&handle_encoding);
|
||||||
|
index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual Slice Finish() override { return index_block_builder_.Finish(); }
|
||||||
|
|
||||||
|
virtual size_t EstimatedSize() const {
|
||||||
|
return index_block_builder_.CurrentSizeEstimate();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
BlockBuilder index_block_builder_;
|
||||||
|
};
|
||||||
|
|
||||||
// Create a index builder based on its type.
|
// Create a index builder based on its type.
|
||||||
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
|
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case BlockBasedTableOptions::kBinarySearch: {
|
case BlockBasedTableOptions::kBinarySearch: {
|
||||||
return new BinarySearchIndexBuilder(comparator);
|
return new ShortenedIndexBuilder(comparator);
|
||||||
|
}
|
||||||
|
case BlockBasedTableOptions::kHashSearch: {
|
||||||
|
return new FullKeyIndexBuilder(comparator);
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
assert(!"Do not recognize the index type ");
|
assert(!"Do not recognize the index type ");
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
#include "table/filter_block.h"
|
#include "table/filter_block.h"
|
||||||
|
#include "table/block_hash_index.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
@ -180,19 +181,51 @@ class BinarySearchIndexReader : public IndexReader {
|
|||||||
std::unique_ptr<Block> index_block_;
|
std::unique_ptr<Block> index_block_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(kailiu) This class is only a stub for now. And the comment below is also
|
|
||||||
// not completed.
|
|
||||||
// Index that leverages an internal hash table to quicken the lookup for a given
|
// Index that leverages an internal hash table to quicken the lookup for a given
|
||||||
// key.
|
// key.
|
||||||
|
// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that
|
||||||
|
// functions requires index to be initalized. To avoid this problem external
|
||||||
|
// caller will pass a function that can create the iterator over the entries
|
||||||
|
// without the table to be fully initialized.
|
||||||
class HashIndexReader : public IndexReader {
|
class HashIndexReader : public IndexReader {
|
||||||
public:
|
public:
|
||||||
static Status Create(RandomAccessFile* file, const BlockHandle& index_handle,
|
static Status Create(RandomAccessFile* file, const BlockHandle& index_handle,
|
||||||
Env* env, const Comparator* comparator,
|
Env* env, const Comparator* comparator,
|
||||||
BlockBasedTable* table,
|
std::function<Iterator*(Iterator*)> data_iter_gen,
|
||||||
const SliceTransform* prefix_extractor,
|
const SliceTransform* prefix_extractor,
|
||||||
IndexReader** index_reader) {
|
IndexReader** index_reader) {
|
||||||
return Status::NotSupported("not implemented yet!");
|
assert(prefix_extractor);
|
||||||
|
Block* index_block = nullptr;
|
||||||
|
auto s =
|
||||||
|
ReadBlockFromFile(file, ReadOptions(), index_handle, &index_block, env);
|
||||||
|
|
||||||
|
if (!s.ok()) {
|
||||||
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*index_reader = new HashIndexReader(comparator, index_block);
|
||||||
|
std::unique_ptr<Iterator> index_iter(index_block->NewIterator(nullptr));
|
||||||
|
std::unique_ptr<Iterator> data_iter(
|
||||||
|
data_iter_gen(index_block->NewIterator(nullptr)));
|
||||||
|
auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(),
|
||||||
|
index_block->NumRestarts(),
|
||||||
|
comparator, prefix_extractor);
|
||||||
|
index_block->SetBlockHashIndex(hash_index);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual Iterator* NewIterator() override {
|
||||||
|
return index_block_->NewIterator(comparator_);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual size_t size() const override { return index_block_->size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
HashIndexReader(const Comparator* comparator, Block* index_block)
|
||||||
|
: IndexReader(comparator), index_block_(index_block) {
|
||||||
|
assert(index_block_ != nullptr);
|
||||||
|
}
|
||||||
|
std::unique_ptr<Block> index_block_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -223,6 +256,11 @@ struct BlockBasedTable::Rep {
|
|||||||
|
|
||||||
std::shared_ptr<const TableProperties> table_properties;
|
std::shared_ptr<const TableProperties> table_properties;
|
||||||
BlockBasedTableOptions::IndexType index_type;
|
BlockBasedTableOptions::IndexType index_type;
|
||||||
|
// TODO(kailiu) It is very ugly to use internal key in table, since table
|
||||||
|
// module should not be relying on db module. However to make things easier
|
||||||
|
// and compatible with existing code, we introduce a wrapper that allows
|
||||||
|
// block to extract prefix without knowing if a key is internal or not.
|
||||||
|
unique_ptr<SliceTransform> internal_prefix_transform;
|
||||||
};
|
};
|
||||||
|
|
||||||
BlockBasedTable::~BlockBasedTable() {
|
BlockBasedTable::~BlockBasedTable() {
|
||||||
@ -747,8 +785,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
return { filter, cache_handle };
|
return { filter, cache_handle };
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options)
|
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
||||||
const {
|
|
||||||
// index reader has already been pre-populated.
|
// index reader has already been pre-populated.
|
||||||
if (rep_->index_reader) {
|
if (rep_->index_reader) {
|
||||||
return rep_->index_reader->NewIterator();
|
return rep_->index_reader->NewIterator();
|
||||||
@ -978,7 +1015,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
|
|||||||
// 3. options
|
// 3. options
|
||||||
// 4. internal_comparator
|
// 4. internal_comparator
|
||||||
// 5. index_type
|
// 5. index_type
|
||||||
Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
|
Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
|
||||||
// Some old version of block-based tables don't have index type present in
|
// Some old version of block-based tables don't have index type present in
|
||||||
// table properties. If that's the case we can safely use the kBinarySearch.
|
// table properties. If that's the case we can safely use the kBinarySearch.
|
||||||
auto index_type = BlockBasedTableOptions::kBinarySearch;
|
auto index_type = BlockBasedTableOptions::kBinarySearch;
|
||||||
@ -989,11 +1026,30 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
|
|||||||
DecodeFixed32(pos->second.c_str()));
|
DecodeFixed32(pos->second.c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto file = rep_->file.get();
|
||||||
|
const auto& index_handle = rep_->index_handle;
|
||||||
|
auto env = rep_->options.env;
|
||||||
|
auto comparator = &rep_->internal_comparator;
|
||||||
|
|
||||||
switch (index_type) {
|
switch (index_type) {
|
||||||
case BlockBasedTableOptions::kBinarySearch: {
|
case BlockBasedTableOptions::kBinarySearch: {
|
||||||
return BinarySearchIndexReader::Create(
|
return BinarySearchIndexReader::Create(file, index_handle, env,
|
||||||
rep_->file.get(), rep_->index_handle, rep_->options.env,
|
comparator, index_reader);
|
||||||
&rep_->internal_comparator, index_reader);
|
}
|
||||||
|
case BlockBasedTableOptions::kHashSearch: {
|
||||||
|
// We need to wrap data with internal_prefix_transform to make sure it can
|
||||||
|
// handle prefix correctly.
|
||||||
|
rep_->internal_prefix_transform.reset(
|
||||||
|
new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
|
||||||
|
return HashIndexReader::Create(
|
||||||
|
file, index_handle, env, comparator,
|
||||||
|
[&](Iterator* index_iter) {
|
||||||
|
return NewTwoLevelIterator(
|
||||||
|
index_iter, &BlockBasedTable::DataBlockReader,
|
||||||
|
const_cast<BlockBasedTable*>(this), ReadOptions(),
|
||||||
|
rep_->soptions, rep_->internal_comparator);
|
||||||
|
},
|
||||||
|
rep_->internal_prefix_transform.get(), index_reader);
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
std::string error_message =
|
std::string error_message =
|
||||||
|
@ -131,7 +131,7 @@ class BlockBasedTable : public TableReader {
|
|||||||
// 2. index is not present in block cache.
|
// 2. index is not present in block cache.
|
||||||
// 3. We disallowed any io to be performed, that is, read_options ==
|
// 3. We disallowed any io to be performed, that is, read_options ==
|
||||||
// kBlockCacheTier
|
// kBlockCacheTier
|
||||||
Iterator* NewIndexIterator(const ReadOptions& read_options) const;
|
Iterator* NewIndexIterator(const ReadOptions& read_options);
|
||||||
|
|
||||||
// Read block cache from block caches (if set): block_cache and
|
// Read block cache from block caches (if set): block_cache and
|
||||||
// block_cache_compressed.
|
// block_cache_compressed.
|
||||||
@ -164,7 +164,7 @@ class BlockBasedTable : public TableReader {
|
|||||||
|
|
||||||
void ReadMeta(const Footer& footer);
|
void ReadMeta(const Footer& footer);
|
||||||
void ReadFilter(const Slice& filter_handle_value);
|
void ReadFilter(const Slice& filter_handle_value);
|
||||||
Status CreateIndexReader(IndexReader** index_reader) const;
|
Status CreateIndexReader(IndexReader** index_reader);
|
||||||
|
|
||||||
// Read the meta block from sst.
|
// Read the meta block from sst.
|
||||||
static Status ReadMetaBlock(
|
static Status ReadMetaBlock(
|
||||||
|
@ -3,7 +3,10 @@
|
|||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
//
|
//
|
||||||
|
#include <stdio.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
@ -11,9 +14,11 @@
|
|||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
|
#include "rocksdb/slice_transform.h"
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
#include "table/block_builder.h"
|
#include "table/block_builder.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
|
#include "table/block_hash_index.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
@ -25,6 +30,40 @@ static std::string RandomString(Random* rnd, int len) {
|
|||||||
test::RandomString(rnd, len, &r);
|
test::RandomString(rnd, len, &r);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
|
||||||
|
Random *rnd) {
|
||||||
|
char buf[50];
|
||||||
|
char *p = &buf[0];
|
||||||
|
snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
|
||||||
|
std::string k(p);
|
||||||
|
if (padding_size) {
|
||||||
|
k += RandomString(rnd, padding_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate random key value pairs.
|
||||||
|
// The generated key will be sorted. You can tune the parameters to generated
|
||||||
|
// different kinds of test key/value pairs for different scenario.
|
||||||
|
void GenerateRandomKVs(std::vector<std::string> *keys,
|
||||||
|
std::vector<std::string> *values, const int from,
|
||||||
|
const int len, const int step = 1,
|
||||||
|
const int padding_size = 0,
|
||||||
|
const int keys_share_prefix = 1) {
|
||||||
|
Random rnd(302);
|
||||||
|
|
||||||
|
// generate different prefix
|
||||||
|
for (int i = from; i < from + len; i += step) {
|
||||||
|
// generating keys that shares the prefix
|
||||||
|
for (int j = 0; j < keys_share_prefix; ++j) {
|
||||||
|
keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
|
||||||
|
|
||||||
|
// 100 bytes values
|
||||||
|
values->emplace_back(RandomString(&rnd, 100));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class BlockTest {};
|
class BlockTest {};
|
||||||
|
|
||||||
@ -39,24 +78,11 @@ TEST(BlockTest, SimpleTest) {
|
|||||||
std::vector<std::string> values;
|
std::vector<std::string> values;
|
||||||
BlockBuilder builder(options, ic.get());
|
BlockBuilder builder(options, ic.get());
|
||||||
int num_records = 100000;
|
int num_records = 100000;
|
||||||
char buf[10];
|
|
||||||
char* p = &buf[0];
|
|
||||||
|
|
||||||
|
GenerateRandomKVs(&keys, &values, 0, num_records);
|
||||||
// add a bunch of records to a block
|
// add a bunch of records to a block
|
||||||
for (int i = 0; i < num_records; i++) {
|
for (int i = 0; i < num_records; i++) {
|
||||||
// generate random kvs
|
builder.Add(keys[i], values[i]);
|
||||||
sprintf(p, "%6d", i);
|
|
||||||
std::string k(p);
|
|
||||||
std::string v = RandomString(&rnd, 100); // 100 byte values
|
|
||||||
|
|
||||||
// write kvs to the block
|
|
||||||
Slice key(k);
|
|
||||||
Slice value(v);
|
|
||||||
builder.Add(key, value);
|
|
||||||
|
|
||||||
// remember kvs in a lookaside array
|
|
||||||
keys.push_back(k);
|
|
||||||
values.push_back(v);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// read serialized contents of the block
|
// read serialized contents of the block
|
||||||
@ -101,6 +127,114 @@ TEST(BlockTest, SimpleTest) {
|
|||||||
delete iter;
|
delete iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// return the block contents
|
||||||
|
BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
|
||||||
|
const std::vector<std::string> &keys,
|
||||||
|
const std::vector<std::string> &values,
|
||||||
|
const int prefix_group_size = 1) {
|
||||||
|
builder->reset(
|
||||||
|
new BlockBuilder(1 /* restart interval */, BytewiseComparator()));
|
||||||
|
|
||||||
|
// Add only half of the keys
|
||||||
|
for (size_t i = 0; i < keys.size(); ++i) {
|
||||||
|
(*builder)->Add(keys[i], values[i]);
|
||||||
|
}
|
||||||
|
Slice rawblock = (*builder)->Finish();
|
||||||
|
|
||||||
|
BlockContents contents;
|
||||||
|
contents.data = rawblock;
|
||||||
|
contents.cachable = false;
|
||||||
|
contents.heap_allocated = false;
|
||||||
|
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CheckBlockContents(BlockContents contents, const int max_key,
|
||||||
|
const std::vector<std::string> &keys,
|
||||||
|
const std::vector<std::string> &values) {
|
||||||
|
const size_t prefix_size = 6;
|
||||||
|
// create block reader
|
||||||
|
Block reader1(contents);
|
||||||
|
Block reader2(contents);
|
||||||
|
|
||||||
|
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||||
|
NewFixedPrefixTransform(prefix_size));
|
||||||
|
|
||||||
|
{
|
||||||
|
auto iter1 = reader1.NewIterator(nullptr);
|
||||||
|
auto iter2 = reader1.NewIterator(nullptr);
|
||||||
|
reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(),
|
||||||
|
BytewiseComparator(),
|
||||||
|
prefix_extractor.get()));
|
||||||
|
|
||||||
|
delete iter1;
|
||||||
|
delete iter2;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Iterator> hash_iter(
|
||||||
|
reader1.NewIterator(BytewiseComparator()));
|
||||||
|
|
||||||
|
std::unique_ptr<Iterator> regular_iter(
|
||||||
|
reader2.NewIterator(BytewiseComparator()));
|
||||||
|
|
||||||
|
// Seek existent keys
|
||||||
|
for (size_t i = 0; i < keys.size(); i++) {
|
||||||
|
hash_iter->Seek(keys[i]);
|
||||||
|
ASSERT_OK(hash_iter->status());
|
||||||
|
ASSERT_TRUE(hash_iter->Valid());
|
||||||
|
|
||||||
|
Slice v = hash_iter->value();
|
||||||
|
ASSERT_EQ(v.ToString().compare(values[i]), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seek non-existent keys.
|
||||||
|
// For hash index, if no key with a given prefix is not found, iterator will
|
||||||
|
// simply be set as invalid; whereas the binary search based iterator will
|
||||||
|
// return the one that is closest.
|
||||||
|
for (int i = 1; i < max_key - 1; i += 2) {
|
||||||
|
auto key = GenerateKey(i, 0, 0, nullptr);
|
||||||
|
hash_iter->Seek(key);
|
||||||
|
ASSERT_TRUE(!hash_iter->Valid());
|
||||||
|
|
||||||
|
regular_iter->Seek(key);
|
||||||
|
ASSERT_TRUE(regular_iter->Valid());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In this test case, no two key share same prefix.
|
||||||
|
TEST(BlockTest, SimpleIndexHash) {
|
||||||
|
const int kMaxKey = 100000;
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
std::vector<std::string> values;
|
||||||
|
GenerateRandomKVs(&keys, &values, 0 /* first key id */,
|
||||||
|
kMaxKey /* last key id */, 2 /* step */,
|
||||||
|
8 /* padding size (8 bytes randomly generated suffix) */);
|
||||||
|
|
||||||
|
std::unique_ptr<BlockBuilder> builder;
|
||||||
|
auto contents = GetBlockContents(&builder, keys, values);
|
||||||
|
|
||||||
|
CheckBlockContents(contents, kMaxKey, keys, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BlockTest, IndexHashWithSharedPrefix) {
|
||||||
|
const int kMaxKey = 100000;
|
||||||
|
// for each prefix, there will be 5 keys starts with it.
|
||||||
|
const int kPrefixGroup = 5;
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
std::vector<std::string> values;
|
||||||
|
// Generate keys with same prefix.
|
||||||
|
GenerateRandomKVs(&keys, &values, 0, // first key id
|
||||||
|
kMaxKey, // last key id
|
||||||
|
2, // step
|
||||||
|
10, // padding size,
|
||||||
|
kPrefixGroup);
|
||||||
|
|
||||||
|
std::unique_ptr<BlockBuilder> builder;
|
||||||
|
auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup);
|
||||||
|
|
||||||
|
CheckBlockContents(contents, kMaxKey, keys, values);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -1055,6 +1055,116 @@ static std::string RandomString(Random* rnd, int len) {
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AddInternalKey(TableConstructor* c, const std::string prefix,
|
||||||
|
int suffix_len = 800) {
|
||||||
|
static Random rnd(1023);
|
||||||
|
InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
|
||||||
|
c->Add(k.Encode().ToString(), "v");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TableTest, HashIndexTest) {
|
||||||
|
TableConstructor c(BytewiseComparator());
|
||||||
|
|
||||||
|
// keys with prefix length 3, make sure the key/value is big enough to fill
|
||||||
|
// one block
|
||||||
|
AddInternalKey(&c, "0015");
|
||||||
|
AddInternalKey(&c, "0035");
|
||||||
|
|
||||||
|
AddInternalKey(&c, "0054");
|
||||||
|
AddInternalKey(&c, "0055");
|
||||||
|
|
||||||
|
AddInternalKey(&c, "0056");
|
||||||
|
AddInternalKey(&c, "0057");
|
||||||
|
|
||||||
|
AddInternalKey(&c, "0058");
|
||||||
|
AddInternalKey(&c, "0075");
|
||||||
|
|
||||||
|
AddInternalKey(&c, "0076");
|
||||||
|
AddInternalKey(&c, "0095");
|
||||||
|
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
KVMap kvmap;
|
||||||
|
Options options;
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.index_type = BlockBasedTableOptions::kHashSearch;
|
||||||
|
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
||||||
|
options.block_cache = NewLRUCache(1024);
|
||||||
|
options.block_size = 1700;
|
||||||
|
|
||||||
|
std::unique_ptr<InternalKeyComparator> comparator(
|
||||||
|
new InternalKeyComparator(BytewiseComparator()));
|
||||||
|
c.Finish(options, *comparator, &keys, &kvmap);
|
||||||
|
auto reader = c.table_reader();
|
||||||
|
|
||||||
|
auto props = c.table_reader()->GetTableProperties();
|
||||||
|
ASSERT_EQ(5u, props->num_data_blocks);
|
||||||
|
|
||||||
|
std::unique_ptr<Iterator> hash_iter(reader->NewIterator(ReadOptions()));
|
||||||
|
|
||||||
|
// -- Find keys do not exist, but have common prefix.
|
||||||
|
std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
|
||||||
|
std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
|
||||||
|
keys[7], keys[9], };
|
||||||
|
|
||||||
|
// find the lower bound of the prefix
|
||||||
|
for (size_t i = 0; i < prefixes.size(); ++i) {
|
||||||
|
hash_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
|
||||||
|
ASSERT_OK(hash_iter->status());
|
||||||
|
ASSERT_TRUE(hash_iter->Valid());
|
||||||
|
|
||||||
|
// seek the first element in the block
|
||||||
|
ASSERT_EQ(lower_bound[i], hash_iter->key().ToString());
|
||||||
|
ASSERT_EQ("v", hash_iter->value().ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the upper bound of prefixes
|
||||||
|
std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
|
||||||
|
|
||||||
|
// find existing keys
|
||||||
|
for (const auto& item : kvmap) {
|
||||||
|
auto ukey = ExtractUserKey(item.first).ToString();
|
||||||
|
hash_iter->Seek(ukey);
|
||||||
|
|
||||||
|
// ASSERT_OK(regular_iter->status());
|
||||||
|
ASSERT_OK(hash_iter->status());
|
||||||
|
|
||||||
|
// ASSERT_TRUE(regular_iter->Valid());
|
||||||
|
ASSERT_TRUE(hash_iter->Valid());
|
||||||
|
|
||||||
|
ASSERT_EQ(item.first, hash_iter->key().ToString());
|
||||||
|
ASSERT_EQ(item.second, hash_iter->value().ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < prefixes.size(); ++i) {
|
||||||
|
// the key is greater than any existing keys.
|
||||||
|
auto key = prefixes[i] + "9";
|
||||||
|
hash_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
|
||||||
|
|
||||||
|
ASSERT_OK(hash_iter->status());
|
||||||
|
if (i == prefixes.size() - 1) {
|
||||||
|
// last key
|
||||||
|
ASSERT_TRUE(!hash_iter->Valid());
|
||||||
|
} else {
|
||||||
|
ASSERT_TRUE(hash_iter->Valid());
|
||||||
|
// seek the first element in the block
|
||||||
|
ASSERT_EQ(upper_bound[i], hash_iter->key().ToString());
|
||||||
|
ASSERT_EQ("v", hash_iter->value().ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find keys with prefix that don't match any of the existing prefixes.
|
||||||
|
std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
|
||||||
|
for (const auto& prefix : non_exist_prefixes) {
|
||||||
|
hash_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
|
||||||
|
// regular_iter->Seek(prefix);
|
||||||
|
|
||||||
|
ASSERT_OK(hash_iter->status());
|
||||||
|
ASSERT_TRUE(!hash_iter->Valid());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// It's very hard to figure out the index block size of a block accurately.
|
// It's very hard to figure out the index block size of a block accurately.
|
||||||
// To make sure we get the index size, we just make sure as key number
|
// To make sure we get the index size, we just make sure as key number
|
||||||
// grows, the filter block size also grows.
|
// grows, the filter block size also grows.
|
||||||
|
Loading…
Reference in New Issue
Block a user