From 75b59d5146b078d67968b30d28bf0657574ec968 Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Thu, 10 Apr 2014 14:19:43 -0700 Subject: [PATCH] Enable hash index for block-based table Summary: Based on previous patches, this diff eventually provides the end-to-end mechanism for users to specify the hash-index. Test Plan: Wrote several new unit tests. Reviewers: sdong, haobo, dhruba Reviewed By: sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D16539 --- db/db_test.cc | 42 +++++++- db/dbformat.h | 31 ++++++ include/rocksdb/table.h | 6 ++ table/block.cc | 113 +++++++++++++------- table/block.h | 18 +++- table/block_based_table_builder.cc | 36 ++++++- table/block_based_table_reader.cc | 76 +++++++++++-- table/block_based_table_reader.h | 4 +- table/block_test.cc | 164 ++++++++++++++++++++++++++--- table/table_test.cc | 110 +++++++++++++++++++ 10 files changed, 521 insertions(+), 79 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 0285905f2..1ec56d712 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -266,6 +266,8 @@ class DBTest { // Sequence of option configurations to try enum OptionConfig { kDefault, + kBlockBasedTableWithPrefixHashIndex, + kBlockBasedTableWithWholeKeyHashIndex, kPlainTableFirstBytePrefix, kPlainTableAllBytesPrefix, kVectorRep, @@ -303,7 +305,8 @@ class DBTest { kSkipDeletesFilterFirst = 1, kSkipUniversalCompaction = 2, kSkipMergePut = 4, - kSkipPlainTable = 8 + kSkipPlainTable = 8, + kSkipHashIndex = 16 }; DBTest() : option_config_(kDefault), @@ -343,6 +346,12 @@ class DBTest { || option_config_ == kPlainTableFirstBytePrefix)) { continue; } + if ((skip_mask & kSkipPlainTable) && + (option_config_ == kBlockBasedTableWithPrefixHashIndex || + option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) { + continue; + } + break; } @@ -439,6 +448,20 @@ class DBTest { case kInfiniteMaxOpenFiles: options.max_open_files = -1; break; + case kBlockBasedTableWithPrefixHashIndex: { + BlockBasedTableOptions table_options; + table_options.index_type = BlockBasedTableOptions::kHashSearch; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewFixedPrefixTransform(1)); + break; + } + case kBlockBasedTableWithWholeKeyHashIndex: { + BlockBasedTableOptions table_options; + table_options.index_type = BlockBasedTableOptions::kHashSearch; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.prefix_extractor.reset(NewNoopTransform()); + break; + } default: break; } @@ -1363,7 +1386,7 @@ TEST(DBTest, KeyMayExist) { // KeyMayExist function only checks data in block caches, which is not used // by plain table format. - } while (ChangeOptions(kSkipPlainTable)); + } while (ChangeOptions(kSkipPlainTable | kSkipHashIndex)); } TEST(DBTest, NonBlockingIteration) { @@ -6184,7 +6207,9 @@ TEST(DBTest, Randomized) { int minimum = 0; if (option_config_ == kHashSkipList || option_config_ == kHashLinkList || - option_config_ == kPlainTableFirstBytePrefix) { + option_config_ == kPlainTableFirstBytePrefix || + option_config_ == kBlockBasedTableWithWholeKeyHashIndex || + option_config_ == kBlockBasedTableWithPrefixHashIndex) { minimum = 1; } if (p < 45) { // Put @@ -6224,8 +6249,15 @@ TEST(DBTest, Randomized) { } if ((step % 100) == 0) { - ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); - ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); + // For DB instances that use the hash index + block-based table, the + // iterator will be invalid right when seeking a non-existent key, right + // than return a key that is close to it. + if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex && + option_config_ != kBlockBasedTableWithPrefixHashIndex) { + ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr)); + ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); + } + // Save a snapshot from each DB this time that we'll use next // time we compare things, to make sure the current state is // preserved with the snapshot diff --git a/db/dbformat.h b/db/dbformat.h index 99925d284..27a082284 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -13,6 +13,7 @@ #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" #include "rocksdb/slice.h" +#include "rocksdb/slice_transform.h" #include "rocksdb/table.h" #include "rocksdb/types.h" #include "util/coding.h" @@ -304,4 +305,34 @@ class IterKey { void operator=(const IterKey&) = delete; }; +class InternalKeySliceTransform : public SliceTransform { + public: + explicit InternalKeySliceTransform(const SliceTransform* transform) + : transform_(transform) {} + + virtual const char* Name() const { return transform_->Name(); } + + virtual Slice Transform(const Slice& src) const { + auto user_key = ExtractUserKey(src); + return transform_->Transform(user_key); + } + + virtual bool InDomain(const Slice& src) const { + auto user_key = ExtractUserKey(src); + return transform_->InDomain(user_key); + } + + virtual bool InRange(const Slice& dst) const { + auto user_key = ExtractUserKey(dst); + return transform_->InRange(user_key); + } + + const SliceTransform* user_prefix_extractor() const { return transform_; } + + private: + // Like comparator, InternalKeySliceTransform will not take care of the + // deletion of transform_ + const SliceTransform* const transform_; +}; + } // namespace rocksdb diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index e350c7780..1016bcf14 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -60,6 +60,12 @@ struct BlockBasedTableOptions { // A space efficient index block that is optimized for // binary-search-based index. kBinarySearch, + + // The hash index, if enabled, will do the hash lookup when + // `ReadOption.prefix_seek == true`. User should also specify + // `Options.prefix_extractor` to allow the index block to correctly + // extract the prefix of the given key and perform hash table lookup. + kHashSearch, }; IndexType index_type = kBinarySearch; diff --git a/table/block.cc b/table/block.cc index 3f969fe2a..6a6751ca7 100644 --- a/table/block.cc +++ b/table/block.cc @@ -11,16 +11,20 @@ #include "table/block.h" -#include #include +#include +#include +#include + #include "rocksdb/comparator.h" +#include "table/block_hash_index.h" #include "table/format.h" #include "util/coding.h" #include "util/logging.h" namespace rocksdb { -inline uint32_t Block::NumRestarts() const { +uint32_t Block::NumRestarts() const { assert(size_ >= 2*sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); } @@ -92,6 +96,7 @@ class Block::Iter : public Iterator { std::string key_; Slice value_; Status status_; + BlockHashIndex* hash_index_; inline int Compare(const Slice& a, const Slice& b) const { return comparator_->Compare(a, b); @@ -118,16 +123,15 @@ class Block::Iter : public Iterator { } public: - Iter(const Comparator* comparator, - const char* data, - uint32_t restarts, - uint32_t num_restarts) + Iter(const Comparator* comparator, const char* data, uint32_t restarts, + uint32_t num_restarts, BlockHashIndex* hash_index) : comparator_(comparator), data_(data), restarts_(restarts), num_restarts_(num_restarts), current_(restarts_), - restart_index_(num_restarts_) { + restart_index_(num_restarts_), + hash_index_(hash_index) { assert(num_restarts_ > 0); } @@ -169,45 +173,22 @@ class Block::Iter : public Iterator { } virtual void Seek(const Slice& target) { - // Binary search in restart array to find the first restart point - // with a key >= target - uint32_t left = 0; - uint32_t right = num_restarts_ - 1; - while (left < right) { - uint32_t mid = (left + right + 1) / 2; - uint32_t region_offset = GetRestartPoint(mid); - uint32_t shared, non_shared, value_length; - const char* key_ptr = DecodeEntry(data_ + region_offset, - data_ + restarts_, - &shared, &non_shared, &value_length); - if (key_ptr == nullptr || (shared != 0)) { - CorruptionError(); - return; - } - Slice mid_key(key_ptr, non_shared); - if (Compare(mid_key, target) < 0) { - // Key at "mid" is smaller than "target". Therefore all - // blocks before "mid" are uninteresting. - left = mid; - } else { - // Key at "mid" is >= "target". Therefore all blocks at or - // after "mid" are uninteresting. - right = mid - 1; - } - } + uint32_t index = 0; + bool ok = hash_index_ ? HashSeek(target, &index) + : BinarySeek(target, 0, num_restarts_ - 1, &index); + if (!ok) { + return; + } + SeekToRestartPoint(index); // Linear search (within restart block) for first key >= target - SeekToRestartPoint(left); + while (true) { - if (!ParseNextKey()) { - return; - } - if (Compare(key_, target) >= 0) { + if (!ParseNextKey() || Compare(key_, target) >= 0) { return; } } } - virtual void SeekToFirst() { SeekToRestartPoint(0); ParseNextKey(); @@ -257,6 +238,53 @@ class Block::Iter : public Iterator { return true; } } + // Binary search in restart array to find the first restart point + // with a key >= target + bool BinarySeek(const Slice& target, uint32_t left, uint32_t right, + uint32_t* index) { + assert(left <= right); + + while (left < right) { + uint32_t mid = (left + right + 1) / 2; + uint32_t region_offset = GetRestartPoint(mid); + uint32_t shared, non_shared, value_length; + const char* key_ptr = + DecodeEntry(data_ + region_offset, data_ + restarts_, &shared, + &non_shared, &value_length); + if (key_ptr == nullptr || (shared != 0)) { + CorruptionError(); + return false; + } + Slice mid_key(key_ptr, non_shared); + if (Compare(mid_key, target) < 0) { + // Key at "mid" is smaller than "target". Therefore all + // blocks before "mid" are uninteresting. + left = mid; + } else { + // Key at "mid" is >= "target". Therefore all blocks at or + // after "mid" are uninteresting. + right = mid - 1; + } + } + + *index = left; + return true; + } + + bool HashSeek(const Slice& target, uint32_t* index) { + assert(hash_index_); + auto restart_index = hash_index_->GetRestartIndex(target); + if (restart_index == nullptr) { + current_ = restarts_; + return 0; + } + + // the elements in restart_array[index : index + num_blocks] + // are all with same prefix. We'll do binary search in that small range. + auto left = restart_index->first_index; + auto right = restart_index->first_index + restart_index->num_blocks - 1; + return BinarySeek(target, left, right, index); + } }; Iterator* Block::NewIterator(const Comparator* cmp) { @@ -267,8 +295,13 @@ Iterator* Block::NewIterator(const Comparator* cmp) { if (num_restarts == 0) { return NewEmptyIterator(); } else { - return new Iter(cmp, data_, restart_offset_, num_restarts); + return new Iter(cmp, data_, restart_offset_, num_restarts, + hash_index_.get()); } } +void Block::SetBlockHashIndex(BlockHashIndex* hash_index) { + hash_index_.reset(hash_index); +} + } // namespace rocksdb diff --git a/table/block.h b/table/block.h index 6d74bb417..b363d62fe 100644 --- a/table/block.h +++ b/table/block.h @@ -10,6 +10,7 @@ #pragma once #include #include + #include "rocksdb/iterator.h" #include "rocksdb/options.h" @@ -17,6 +18,7 @@ namespace rocksdb { struct BlockContents; class Comparator; +class BlockHashIndex; class Block { public: @@ -26,20 +28,28 @@ class Block { ~Block(); size_t size() const { return size_; } - bool cachable() const { return cachable_; } + const char* data() const { return data_; } + bool cachable() const { return cachable_; } + uint32_t NumRestarts() const; CompressionType compression_type() const { return compression_type_; } + + // If hash index lookup is enabled and `use_hash_index` is true. This block + // will do hash lookup for the key prefix. + // + // NOTE: for the hash based lookup, if a key prefix doesn't match any key, + // the iterator will simply be set as "invalid", rather than returning + // the key that is just pass the target key. Iterator* NewIterator(const Comparator* comparator); - const char* data() { return data_; } + void SetBlockHashIndex(BlockHashIndex* hash_index); private: - uint32_t NumRestarts() const; - const char* data_; size_t size_; uint32_t restart_offset_; // Offset in data_ of restart array bool owned_; // Block owns data_[] bool cachable_; CompressionType compression_type_; + std::unique_ptr hash_index_; // No copying allowed Block(const Block&); diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index de2466605..6b48bf0e6 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -97,9 +97,9 @@ class IndexBuilder { // 2. Shorten the key length for index block. Other than honestly using the // last key in the data block as the index key, we instead find a shortest // substitute key that serves the same function. -class BinarySearchIndexBuilder : public IndexBuilder { +class ShortenedIndexBuilder : public IndexBuilder { public: - explicit BinarySearchIndexBuilder(const Comparator* comparator) + explicit ShortenedIndexBuilder(const Comparator* comparator) : IndexBuilder(comparator), index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {} @@ -128,11 +128,41 @@ class BinarySearchIndexBuilder : public IndexBuilder { BlockBuilder index_block_builder_; }; +// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like +// ShortenedIndexBuilder, but preserves the full key instead the substitude key. +// with the reason being that hash index is based on "prefix". +class FullKeyIndexBuilder : public IndexBuilder { + public: + explicit FullKeyIndexBuilder(const Comparator* comparator) + : IndexBuilder(comparator), + index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {} + + virtual void AddEntry(std::string* last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle) override { + std::string handle_encoding; + block_handle.EncodeTo(&handle_encoding); + index_block_builder_.Add(*last_key_in_current_block, handle_encoding); + } + + virtual Slice Finish() override { return index_block_builder_.Finish(); } + + virtual size_t EstimatedSize() const { + return index_block_builder_.CurrentSizeEstimate(); + } + + private: + BlockBuilder index_block_builder_; +}; + // Create a index builder based on its type. IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) { switch (type) { case BlockBasedTableOptions::kBinarySearch: { - return new BinarySearchIndexBuilder(comparator); + return new ShortenedIndexBuilder(comparator); + } + case BlockBasedTableOptions::kHashSearch: { + return new FullKeyIndexBuilder(comparator); } default: { assert(!"Do not recognize the index type "); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index c1555747a..f686239cb 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -25,6 +25,7 @@ #include "table/block.h" #include "table/filter_block.h" +#include "table/block_hash_index.h" #include "table/format.h" #include "table/meta_blocks.h" #include "table/two_level_iterator.h" @@ -180,19 +181,51 @@ class BinarySearchIndexReader : public IndexReader { std::unique_ptr index_block_; }; -// TODO(kailiu) This class is only a stub for now. And the comment below is also -// not completed. // Index that leverages an internal hash table to quicken the lookup for a given // key. +// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that +// functions requires index to be initalized. To avoid this problem external +// caller will pass a function that can create the iterator over the entries +// without the table to be fully initialized. class HashIndexReader : public IndexReader { public: static Status Create(RandomAccessFile* file, const BlockHandle& index_handle, Env* env, const Comparator* comparator, - BlockBasedTable* table, + std::function data_iter_gen, const SliceTransform* prefix_extractor, IndexReader** index_reader) { - return Status::NotSupported("not implemented yet!"); + assert(prefix_extractor); + Block* index_block = nullptr; + auto s = + ReadBlockFromFile(file, ReadOptions(), index_handle, &index_block, env); + + if (!s.ok()) { + return s; + } + + *index_reader = new HashIndexReader(comparator, index_block); + std::unique_ptr index_iter(index_block->NewIterator(nullptr)); + std::unique_ptr data_iter( + data_iter_gen(index_block->NewIterator(nullptr))); + auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(), + index_block->NumRestarts(), + comparator, prefix_extractor); + index_block->SetBlockHashIndex(hash_index); + return s; } + + virtual Iterator* NewIterator() override { + return index_block_->NewIterator(comparator_); + } + + virtual size_t size() const override { return index_block_->size(); } + + private: + HashIndexReader(const Comparator* comparator, Block* index_block) + : IndexReader(comparator), index_block_(index_block) { + assert(index_block_ != nullptr); + } + std::unique_ptr index_block_; }; @@ -223,6 +256,11 @@ struct BlockBasedTable::Rep { std::shared_ptr table_properties; BlockBasedTableOptions::IndexType index_type; + // TODO(kailiu) It is very ugly to use internal key in table, since table + // module should not be relying on db module. However to make things easier + // and compatible with existing code, we introduce a wrapper that allows + // block to extract prefix without knowing if a key is internal or not. + unique_ptr internal_prefix_transform; }; BlockBasedTable::~BlockBasedTable() { @@ -747,8 +785,7 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( return { filter, cache_handle }; } -Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) - const { +Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) { // index reader has already been pre-populated. if (rep_->index_reader) { return rep_->index_reader->NewIterator(); @@ -978,7 +1015,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, // 3. options // 4. internal_comparator // 5. index_type -Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const { +Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) { // Some old version of block-based tables don't have index type present in // table properties. If that's the case we can safely use the kBinarySearch. auto index_type = BlockBasedTableOptions::kBinarySearch; @@ -989,11 +1026,30 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const { DecodeFixed32(pos->second.c_str())); } + auto file = rep_->file.get(); + const auto& index_handle = rep_->index_handle; + auto env = rep_->options.env; + auto comparator = &rep_->internal_comparator; + switch (index_type) { case BlockBasedTableOptions::kBinarySearch: { - return BinarySearchIndexReader::Create( - rep_->file.get(), rep_->index_handle, rep_->options.env, - &rep_->internal_comparator, index_reader); + return BinarySearchIndexReader::Create(file, index_handle, env, + comparator, index_reader); + } + case BlockBasedTableOptions::kHashSearch: { + // We need to wrap data with internal_prefix_transform to make sure it can + // handle prefix correctly. + rep_->internal_prefix_transform.reset( + new InternalKeySliceTransform(rep_->options.prefix_extractor.get())); + return HashIndexReader::Create( + file, index_handle, env, comparator, + [&](Iterator* index_iter) { + return NewTwoLevelIterator( + index_iter, &BlockBasedTable::DataBlockReader, + const_cast(this), ReadOptions(), + rep_->soptions, rep_->internal_comparator); + }, + rep_->internal_prefix_transform.get(), index_reader); } default: { std::string error_message = diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 8b8f09bd3..613460634 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -131,7 +131,7 @@ class BlockBasedTable : public TableReader { // 2. index is not present in block cache. // 3. We disallowed any io to be performed, that is, read_options == // kBlockCacheTier - Iterator* NewIndexIterator(const ReadOptions& read_options) const; + Iterator* NewIndexIterator(const ReadOptions& read_options); // Read block cache from block caches (if set): block_cache and // block_cache_compressed. @@ -164,7 +164,7 @@ class BlockBasedTable : public TableReader { void ReadMeta(const Footer& footer); void ReadFilter(const Slice& filter_handle_value); - Status CreateIndexReader(IndexReader** index_reader) const; + Status CreateIndexReader(IndexReader** index_reader); // Read the meta block from sst. static Status ReadMetaBlock( diff --git a/table/block_test.cc b/table/block_test.cc index 588ce6729..fdba8e99b 100644 --- a/table/block_test.cc +++ b/table/block_test.cc @@ -3,7 +3,10 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // +#include #include +#include + #include "db/dbformat.h" #include "db/memtable.h" #include "db/write_batch_internal.h" @@ -11,9 +14,11 @@ #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/table.h" +#include "rocksdb/slice_transform.h" #include "table/block.h" #include "table/block_builder.h" #include "table/format.h" +#include "table/block_hash_index.h" #include "util/random.h" #include "util/testharness.h" #include "util/testutil.h" @@ -25,6 +30,40 @@ static std::string RandomString(Random* rnd, int len) { test::RandomString(rnd, len, &r); return r; } +std::string GenerateKey(int primary_key, int secondary_key, int padding_size, + Random *rnd) { + char buf[50]; + char *p = &buf[0]; + snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); + std::string k(p); + if (padding_size) { + k += RandomString(rnd, padding_size); + } + + return k; +} + +// Generate random key value pairs. +// The generated key will be sorted. You can tune the parameters to generated +// different kinds of test key/value pairs for different scenario. +void GenerateRandomKVs(std::vector *keys, + std::vector *values, const int from, + const int len, const int step = 1, + const int padding_size = 0, + const int keys_share_prefix = 1) { + Random rnd(302); + + // generate different prefix + for (int i = from; i < from + len; i += step) { + // generating keys that shares the prefix + for (int j = 0; j < keys_share_prefix; ++j) { + keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); + + // 100 bytes values + values->emplace_back(RandomString(&rnd, 100)); + } + } +} class BlockTest {}; @@ -39,24 +78,11 @@ TEST(BlockTest, SimpleTest) { std::vector values; BlockBuilder builder(options, ic.get()); int num_records = 100000; - char buf[10]; - char* p = &buf[0]; + GenerateRandomKVs(&keys, &values, 0, num_records); // add a bunch of records to a block for (int i = 0; i < num_records; i++) { - // generate random kvs - sprintf(p, "%6d", i); - std::string k(p); - std::string v = RandomString(&rnd, 100); // 100 byte values - - // write kvs to the block - Slice key(k); - Slice value(v); - builder.Add(key, value); - - // remember kvs in a lookaside array - keys.push_back(k); - values.push_back(v); + builder.Add(keys[i], values[i]); } // read serialized contents of the block @@ -101,6 +127,114 @@ TEST(BlockTest, SimpleTest) { delete iter; } +// return the block contents +BlockContents GetBlockContents(std::unique_ptr *builder, + const std::vector &keys, + const std::vector &values, + const int prefix_group_size = 1) { + builder->reset( + new BlockBuilder(1 /* restart interval */, BytewiseComparator())); + + // Add only half of the keys + for (size_t i = 0; i < keys.size(); ++i) { + (*builder)->Add(keys[i], values[i]); + } + Slice rawblock = (*builder)->Finish(); + + BlockContents contents; + contents.data = rawblock; + contents.cachable = false; + contents.heap_allocated = false; + + return contents; +} + +void CheckBlockContents(BlockContents contents, const int max_key, + const std::vector &keys, + const std::vector &values) { + const size_t prefix_size = 6; + // create block reader + Block reader1(contents); + Block reader2(contents); + + std::unique_ptr prefix_extractor( + NewFixedPrefixTransform(prefix_size)); + + { + auto iter1 = reader1.NewIterator(nullptr); + auto iter2 = reader1.NewIterator(nullptr); + reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(), + BytewiseComparator(), + prefix_extractor.get())); + + delete iter1; + delete iter2; + } + + std::unique_ptr hash_iter( + reader1.NewIterator(BytewiseComparator())); + + std::unique_ptr regular_iter( + reader2.NewIterator(BytewiseComparator())); + + // Seek existent keys + for (size_t i = 0; i < keys.size(); i++) { + hash_iter->Seek(keys[i]); + ASSERT_OK(hash_iter->status()); + ASSERT_TRUE(hash_iter->Valid()); + + Slice v = hash_iter->value(); + ASSERT_EQ(v.ToString().compare(values[i]), 0); + } + + // Seek non-existent keys. + // For hash index, if no key with a given prefix is not found, iterator will + // simply be set as invalid; whereas the binary search based iterator will + // return the one that is closest. + for (int i = 1; i < max_key - 1; i += 2) { + auto key = GenerateKey(i, 0, 0, nullptr); + hash_iter->Seek(key); + ASSERT_TRUE(!hash_iter->Valid()); + + regular_iter->Seek(key); + ASSERT_TRUE(regular_iter->Valid()); + } +} + +// In this test case, no two key share same prefix. +TEST(BlockTest, SimpleIndexHash) { + const int kMaxKey = 100000; + std::vector keys; + std::vector values; + GenerateRandomKVs(&keys, &values, 0 /* first key id */, + kMaxKey /* last key id */, 2 /* step */, + 8 /* padding size (8 bytes randomly generated suffix) */); + + std::unique_ptr builder; + auto contents = GetBlockContents(&builder, keys, values); + + CheckBlockContents(contents, kMaxKey, keys, values); +} + +TEST(BlockTest, IndexHashWithSharedPrefix) { + const int kMaxKey = 100000; + // for each prefix, there will be 5 keys starts with it. + const int kPrefixGroup = 5; + std::vector keys; + std::vector values; + // Generate keys with same prefix. + GenerateRandomKVs(&keys, &values, 0, // first key id + kMaxKey, // last key id + 2, // step + 10, // padding size, + kPrefixGroup); + + std::unique_ptr builder; + auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup); + + CheckBlockContents(contents, kMaxKey, keys, values); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/table/table_test.cc b/table/table_test.cc index 18ae2a3aa..0426122ff 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1055,6 +1055,116 @@ static std::string RandomString(Random* rnd, int len) { return r; } +void AddInternalKey(TableConstructor* c, const std::string prefix, + int suffix_len = 800) { + static Random rnd(1023); + InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue); + c->Add(k.Encode().ToString(), "v"); +} + +TEST(TableTest, HashIndexTest) { + TableConstructor c(BytewiseComparator()); + + // keys with prefix length 3, make sure the key/value is big enough to fill + // one block + AddInternalKey(&c, "0015"); + AddInternalKey(&c, "0035"); + + AddInternalKey(&c, "0054"); + AddInternalKey(&c, "0055"); + + AddInternalKey(&c, "0056"); + AddInternalKey(&c, "0057"); + + AddInternalKey(&c, "0058"); + AddInternalKey(&c, "0075"); + + AddInternalKey(&c, "0076"); + AddInternalKey(&c, "0095"); + + std::vector keys; + KVMap kvmap; + Options options; + BlockBasedTableOptions table_options; + table_options.index_type = BlockBasedTableOptions::kHashSearch; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + options.prefix_extractor.reset(NewFixedPrefixTransform(3)); + options.block_cache = NewLRUCache(1024); + options.block_size = 1700; + + std::unique_ptr comparator( + new InternalKeyComparator(BytewiseComparator())); + c.Finish(options, *comparator, &keys, &kvmap); + auto reader = c.table_reader(); + + auto props = c.table_reader()->GetTableProperties(); + ASSERT_EQ(5u, props->num_data_blocks); + + std::unique_ptr hash_iter(reader->NewIterator(ReadOptions())); + + // -- Find keys do not exist, but have common prefix. + std::vector prefixes = {"001", "003", "005", "007", "009"}; + std::vector lower_bound = {keys[0], keys[1], keys[2], + keys[7], keys[9], }; + + // find the lower bound of the prefix + for (size_t i = 0; i < prefixes.size(); ++i) { + hash_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode()); + ASSERT_OK(hash_iter->status()); + ASSERT_TRUE(hash_iter->Valid()); + + // seek the first element in the block + ASSERT_EQ(lower_bound[i], hash_iter->key().ToString()); + ASSERT_EQ("v", hash_iter->value().ToString()); + } + + // find the upper bound of prefixes + std::vector upper_bound = {keys[1], keys[2], keys[7], keys[9], }; + + // find existing keys + for (const auto& item : kvmap) { + auto ukey = ExtractUserKey(item.first).ToString(); + hash_iter->Seek(ukey); + + // ASSERT_OK(regular_iter->status()); + ASSERT_OK(hash_iter->status()); + + // ASSERT_TRUE(regular_iter->Valid()); + ASSERT_TRUE(hash_iter->Valid()); + + ASSERT_EQ(item.first, hash_iter->key().ToString()); + ASSERT_EQ(item.second, hash_iter->value().ToString()); + } + + for (size_t i = 0; i < prefixes.size(); ++i) { + // the key is greater than any existing keys. + auto key = prefixes[i] + "9"; + hash_iter->Seek(InternalKey(key, 0, kTypeValue).Encode()); + + ASSERT_OK(hash_iter->status()); + if (i == prefixes.size() - 1) { + // last key + ASSERT_TRUE(!hash_iter->Valid()); + } else { + ASSERT_TRUE(hash_iter->Valid()); + // seek the first element in the block + ASSERT_EQ(upper_bound[i], hash_iter->key().ToString()); + ASSERT_EQ("v", hash_iter->value().ToString()); + } + } + + // find keys with prefix that don't match any of the existing prefixes. + std::vector non_exist_prefixes = {"002", "004", "006", "008"}; + for (const auto& prefix : non_exist_prefixes) { + hash_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode()); + // regular_iter->Seek(prefix); + + ASSERT_OK(hash_iter->status()); + ASSERT_TRUE(!hash_iter->Valid()); + } +} + // It's very hard to figure out the index block size of a block accurately. // To make sure we get the index size, we just make sure as key number // grows, the filter block size also grows.