Enable hash index for block-based table

Summary: Based on previous patches, this diff eventually provides the end-to-end mechanism for users to specify the hash-index. Test Plan: Wrote several new unit tests. Reviewers: sdong, haobo, dhruba Reviewed By: sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D16539
2014-04-10 14:19:43 -07:00 · 2014-04-10 14:19:43 -07:00 · 75b59d5146
commit 75b59d5146
parent 7a92537fc4
10 changed files with 521 additions and 79 deletions
--- a/db/db_test.cc
+++ b/db/db_test.cc
@ -266,6 +266,8 @@ class DBTest {
  // Sequence of option configurations to try
  enum OptionConfig {
    kDefault,
    kBlockBasedTableWithPrefixHashIndex,
    kBlockBasedTableWithWholeKeyHashIndex,
    kPlainTableFirstBytePrefix,
    kPlainTableAllBytesPrefix,
    kVectorRep,
@ -303,7 +305,8 @@ class DBTest {
    kSkipDeletesFilterFirst = 1,
    kSkipUniversalCompaction = 2,
    kSkipMergePut = 4,
-    kSkipPlainTable = 8
+    kSkipPlainTable = 8,
    kSkipHashIndex = 16
  };
  DBTest() : option_config_(kDefault),
@ -343,6 +346,12 @@ class DBTest {
              || option_config_ == kPlainTableFirstBytePrefix)) {
        continue;
      }
      if ((skip_mask & kSkipPlainTable) &&
          (option_config_ == kBlockBasedTableWithPrefixHashIndex ||
           option_config_ == kBlockBasedTableWithWholeKeyHashIndex)) {
        continue;
      }
      break;
    }
@ -439,6 +448,20 @@ class DBTest {
      case kInfiniteMaxOpenFiles:
        options.max_open_files = -1;
        break;
      case kBlockBasedTableWithPrefixHashIndex: {
        BlockBasedTableOptions table_options;
        table_options.index_type = BlockBasedTableOptions::kHashSearch;
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
        options.prefix_extractor.reset(NewFixedPrefixTransform(1));
        break;
      }
      case kBlockBasedTableWithWholeKeyHashIndex: {
        BlockBasedTableOptions table_options;
        table_options.index_type = BlockBasedTableOptions::kHashSearch;
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
        options.prefix_extractor.reset(NewNoopTransform());
        break;
      }
      default:
        break;
    }
@ -1363,7 +1386,7 @@ TEST(DBTest, KeyMayExist) {
    // KeyMayExist function only checks data in block caches, which is not used
    // by plain table format.
-  } while (ChangeOptions(kSkipPlainTable));
+  } while (ChangeOptions(kSkipPlainTable | kSkipHashIndex));
 }
 TEST(DBTest, NonBlockingIteration) {
@ -6184,7 +6207,9 @@ TEST(DBTest, Randomized) {
      int minimum = 0;
      if (option_config_ == kHashSkipList ||
          option_config_ == kHashLinkList ||
-          option_config_ == kPlainTableFirstBytePrefix) {
+          option_config_ == kPlainTableFirstBytePrefix ||
          option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
          option_config_ == kBlockBasedTableWithPrefixHashIndex) {
        minimum = 1;
      }
      if (p < 45) {                               // Put
@ -6224,8 +6249,15 @@ TEST(DBTest, Randomized) {
      }
      if ((step % 100) == 0) {
-        ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
+        // For DB instances that use the hash index + block-based table, the
-        ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
+        // iterator will be invalid right when seeking a non-existent key, right
        // than return a key that is close to it.
        if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
            option_config_ != kBlockBasedTableWithPrefixHashIndex) {
          ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
          ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
        }
        // Save a snapshot from each DB this time that we'll use next
        // time we compare things, to make sure the current state is
        // preserved with the snapshot
--- a/db/dbformat.h
+++ b/db/dbformat.h
@ -13,6 +13,7 @@
 #include "rocksdb/db.h"
 #include "rocksdb/filter_policy.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/slice_transform.h"
 #include "rocksdb/table.h"
 #include "rocksdb/types.h"
 #include "util/coding.h"
@ -304,4 +305,34 @@ class IterKey {
  void operator=(const IterKey&) = delete;
 };
 class InternalKeySliceTransform : public SliceTransform {
 public:
  explicit InternalKeySliceTransform(const SliceTransform* transform)
      : transform_(transform) {}
  virtual const char* Name() const { return transform_->Name(); }
  virtual Slice Transform(const Slice& src) const {
    auto user_key = ExtractUserKey(src);
    return transform_->Transform(user_key);
  }
  virtual bool InDomain(const Slice& src) const {
    auto user_key = ExtractUserKey(src);
    return transform_->InDomain(user_key);
  }
  virtual bool InRange(const Slice& dst) const {
    auto user_key = ExtractUserKey(dst);
    return transform_->InRange(user_key);
  }
  const SliceTransform* user_prefix_extractor() const { return transform_; }
 private:
  // Like comparator, InternalKeySliceTransform will not take care of the
  // deletion of transform_
  const SliceTransform* const transform_;
 };
 }  // namespace rocksdb
--- a/include/rocksdb/table.h
+++ b/include/rocksdb/table.h
@ -60,6 +60,12 @@ struct BlockBasedTableOptions {
    // A space efficient index block that is optimized for
    // binary-search-based index.
    kBinarySearch,
    // The hash index, if enabled, will do the hash lookup when
    // `ReadOption.prefix_seek == true`. User should also specify
    // `Options.prefix_extractor` to allow the index block to correctly
    // extract the prefix of the given key and perform hash table lookup.
    kHashSearch,
  };
  IndexType index_type = kBinarySearch;
--- a/table/block.cc
+++ b/table/block.cc
@ -11,16 +11,20 @@
 #include "table/block.h"
 #include <vector>
 #include <algorithm>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "rocksdb/comparator.h"
 #include "table/block_hash_index.h"
 #include "table/format.h"
 #include "util/coding.h"
 #include "util/logging.h"
 namespace rocksdb {
-inline uint32_t Block::NumRestarts() const {
+uint32_t Block::NumRestarts() const {
  assert(size_ >= 2*sizeof(uint32_t));
  return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
 }
@ -92,6 +96,7 @@ class Block::Iter : public Iterator {
  std::string key_;
  Slice value_;
  Status status_;
  BlockHashIndex* hash_index_;
  inline int Compare(const Slice& a, const Slice& b) const {
    return comparator_->Compare(a, b);
@ -118,16 +123,15 @@ class Block::Iter : public Iterator {
  }
 public:
-  Iter(const Comparator* comparator,
+  Iter(const Comparator* comparator, const char* data, uint32_t restarts,
-       const char* data,
+       uint32_t num_restarts, BlockHashIndex* hash_index)
       uint32_t restarts,
       uint32_t num_restarts)
      : comparator_(comparator),
        data_(data),
        restarts_(restarts),
        num_restarts_(num_restarts),
        current_(restarts_),
-        restart_index_(num_restarts_) {
+        restart_index_(num_restarts_),
        hash_index_(hash_index) {
    assert(num_restarts_ > 0);
  }
@ -169,45 +173,22 @@ class Block::Iter : public Iterator {
  }
  virtual void Seek(const Slice& target) {
-    // Binary search in restart array to find the first restart point
+    uint32_t index = 0;
-    // with a key >= target
+    bool ok = hash_index_ ? HashSeek(target, &index)
-    uint32_t left = 0;
+                          : BinarySeek(target, 0, num_restarts_ - 1, &index);
    uint32_t right = num_restarts_ - 1;
    while (left < right) {
      uint32_t mid = (left + right + 1) / 2;
      uint32_t region_offset = GetRestartPoint(mid);
      uint32_t shared, non_shared, value_length;
      const char* key_ptr = DecodeEntry(data_ + region_offset,
                                        data_ + restarts_,
                                        &shared, &non_shared, &value_length);
      if (key_ptr == nullptr || (shared != 0)) {
        CorruptionError();
        return;
      }
      Slice mid_key(key_ptr, non_shared);
      if (Compare(mid_key, target) < 0) {
        // Key at "mid" is smaller than "target".  Therefore all
        // blocks before "mid" are uninteresting.
        left = mid;
      } else {
        // Key at "mid" is >= "target".  Therefore all blocks at or
        // after "mid" are uninteresting.
        right = mid - 1;
      }
    }
    if (!ok) {
      return;
    }
    SeekToRestartPoint(index);
    // Linear search (within restart block) for first key >= target
-    SeekToRestartPoint(left);
+
    while (true) {
-      if (!ParseNextKey()) {
+      if (!ParseNextKey() || Compare(key_, target) >= 0) {
        return;
      }
      if (Compare(key_, target) >= 0) {
        return;
      }
    }
  }
  virtual void SeekToFirst() {
    SeekToRestartPoint(0);
    ParseNextKey();
@ -257,6 +238,53 @@ class Block::Iter : public Iterator {
      return true;
    }
  }
  // Binary search in restart array to find the first restart point
  // with a key >= target
  bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
                  uint32_t* index) {
    assert(left <= right);
    while (left < right) {
      uint32_t mid = (left + right + 1) / 2;
      uint32_t region_offset = GetRestartPoint(mid);
      uint32_t shared, non_shared, value_length;
      const char* key_ptr =
          DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
                      &non_shared, &value_length);
      if (key_ptr == nullptr || (shared != 0)) {
        CorruptionError();
        return false;
      }
      Slice mid_key(key_ptr, non_shared);
      if (Compare(mid_key, target) < 0) {
        // Key at "mid" is smaller than "target". Therefore all
        // blocks before "mid" are uninteresting.
        left = mid;
      } else {
        // Key at "mid" is >= "target". Therefore all blocks at or
        // after "mid" are uninteresting.
        right = mid - 1;
      }
    }
    *index = left;
    return true;
  }
  bool HashSeek(const Slice& target, uint32_t* index) {
    assert(hash_index_);
    auto restart_index = hash_index_->GetRestartIndex(target);
    if (restart_index == nullptr) {
      current_ = restarts_;
      return 0;
    }
    // the elements in restart_array[index : index + num_blocks]
    // are all with same prefix. We'll do binary search in that small range.
    auto left = restart_index->first_index;
    auto right = restart_index->first_index + restart_index->num_blocks - 1;
    return BinarySeek(target, left, right, index);
  }
 };
 Iterator* Block::NewIterator(const Comparator* cmp) {
@ -267,8 +295,13 @@ Iterator* Block::NewIterator(const Comparator* cmp) {
  if (num_restarts == 0) {
    return NewEmptyIterator();
  } else {
-    return new Iter(cmp, data_, restart_offset_, num_restarts);
+    return new Iter(cmp, data_, restart_offset_, num_restarts,
                    hash_index_.get());
  }
 }
 void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
  hash_index_.reset(hash_index);
 }
 }  // namespace rocksdb
--- a/table/block.h
+++ b/table/block.h
@ -10,6 +10,7 @@
 #pragma once
 #include <stddef.h>
 #include <stdint.h>
 #include "rocksdb/iterator.h"
 #include "rocksdb/options.h"
@ -17,6 +18,7 @@ namespace rocksdb {
 struct BlockContents;
 class Comparator;
 class BlockHashIndex;
 class Block {
 public:
@ -26,20 +28,28 @@ class Block {
  ~Block();
  size_t size() const { return size_; }
-  bool   cachable() const { return cachable_; }
+  const char* data() const { return data_; }
  bool cachable() const { return cachable_; }
  uint32_t NumRestarts() const;
  CompressionType compression_type() const { return compression_type_; }
  // If hash index lookup is enabled and `use_hash_index` is true. This block
  // will do hash lookup for the key prefix.
  //
  // NOTE: for the hash based lookup, if a key prefix doesn't match any key,
  // the iterator will simply be set as "invalid", rather than returning
  // the key that is just pass the target key.
  Iterator* NewIterator(const Comparator* comparator);
-  const char* data() { return data_; }
+  void SetBlockHashIndex(BlockHashIndex* hash_index);
 private:
  uint32_t NumRestarts() const;
  const char* data_;
  size_t size_;
  uint32_t restart_offset_;     // Offset in data_ of restart array
  bool owned_;                  // Block owns data_[]
  bool cachable_;
  CompressionType compression_type_;
  std::unique_ptr<BlockHashIndex> hash_index_;
  // No copying allowed
  Block(const Block&);
--- a/table/block_based_table_builder.cc
+++ b/table/block_based_table_builder.cc
@ -97,9 +97,9 @@ class IndexBuilder {
 //  2. Shorten the key length for index block. Other than honestly using the
 //     last key in the data block as the index key, we instead find a shortest
 //     substitute key that serves the same function.
-class BinarySearchIndexBuilder : public IndexBuilder {
+class ShortenedIndexBuilder : public IndexBuilder {
 public:
-  explicit BinarySearchIndexBuilder(const Comparator* comparator)
+  explicit ShortenedIndexBuilder(const Comparator* comparator)
      : IndexBuilder(comparator),
        index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
@ -128,11 +128,41 @@ class BinarySearchIndexBuilder : public IndexBuilder {
  BlockBuilder index_block_builder_;
 };
 // FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like
 // ShortenedIndexBuilder, but preserves the full key instead the substitude key.
 // with the reason being that hash index is based on "prefix".
 class FullKeyIndexBuilder : public IndexBuilder {
 public:
  explicit FullKeyIndexBuilder(const Comparator* comparator)
      : IndexBuilder(comparator),
        index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
  virtual void AddEntry(std::string* last_key_in_current_block,
                        const Slice* first_key_in_next_block,
                        const BlockHandle& block_handle) override {
    std::string handle_encoding;
    block_handle.EncodeTo(&handle_encoding);
    index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
  }
  virtual Slice Finish() override { return index_block_builder_.Finish(); }
  virtual size_t EstimatedSize() const {
    return index_block_builder_.CurrentSizeEstimate();
  }
 private:
  BlockBuilder index_block_builder_;
 };
 // Create a index builder based on its type.
 IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
  switch (type) {
    case BlockBasedTableOptions::kBinarySearch: {
-      return new BinarySearchIndexBuilder(comparator);
+      return new ShortenedIndexBuilder(comparator);
    }
    case BlockBasedTableOptions::kHashSearch: {
      return new FullKeyIndexBuilder(comparator);
    }
    default: {
      assert(!"Do not recognize the index type ");
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@ -25,6 +25,7 @@
 #include "table/block.h"
 #include "table/filter_block.h"
 #include "table/block_hash_index.h"
 #include "table/format.h"
 #include "table/meta_blocks.h"
 #include "table/two_level_iterator.h"
@ -180,19 +181,51 @@ class BinarySearchIndexReader : public IndexReader {
  std::unique_ptr<Block> index_block_;
 };
 // TODO(kailiu) This class is only a stub for now. And the comment below is also
 // not completed.
 // Index that leverages an internal hash table to quicken the lookup for a given
 // key.
 // @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that
 // functions requires index to be initalized. To avoid this problem external
 // caller will pass a function that can create the iterator over the entries
 // without the table to be fully initialized.
 class HashIndexReader : public IndexReader {
 public:
  static Status Create(RandomAccessFile* file, const BlockHandle& index_handle,
                       Env* env, const Comparator* comparator,
-                       BlockBasedTable* table,
+                       std::function<Iterator*(Iterator*)> data_iter_gen,
                       const SliceTransform* prefix_extractor,
                       IndexReader** index_reader) {
-    return Status::NotSupported("not implemented yet!");
+    assert(prefix_extractor);
    Block* index_block = nullptr;
    auto s =
        ReadBlockFromFile(file, ReadOptions(), index_handle, &index_block, env);
    if (!s.ok()) {
      return s;
    }
    *index_reader = new HashIndexReader(comparator, index_block);
    std::unique_ptr<Iterator> index_iter(index_block->NewIterator(nullptr));
    std::unique_ptr<Iterator> data_iter(
        data_iter_gen(index_block->NewIterator(nullptr)));
    auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(),
                                           index_block->NumRestarts(),
                                           comparator, prefix_extractor);
    index_block->SetBlockHashIndex(hash_index);
    return s;
  }
  virtual Iterator* NewIterator() override {
    return index_block_->NewIterator(comparator_);
  }
  virtual size_t size() const override { return index_block_->size(); }
 private:
  HashIndexReader(const Comparator* comparator, Block* index_block)
      : IndexReader(comparator), index_block_(index_block) {
    assert(index_block_ != nullptr);
  }
  std::unique_ptr<Block> index_block_;
 };
@ -223,6 +256,11 @@ struct BlockBasedTable::Rep {
  std::shared_ptr<const TableProperties> table_properties;
  BlockBasedTableOptions::IndexType index_type;
  // TODO(kailiu) It is very ugly to use internal key in table, since table
  // module should not be relying on db module. However to make things easier
  // and compatible with existing code, we introduce a wrapper that allows
  // block to extract prefix without knowing if a key is internal or not.
  unique_ptr<SliceTransform> internal_prefix_transform;
 };
 BlockBasedTable::~BlockBasedTable() {
@ -747,8 +785,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
  return { filter, cache_handle };
 }
-Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options)
+Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
    const {
  // index reader has already been pre-populated.
  if (rep_->index_reader) {
    return rep_->index_reader->NewIterator();
@ -978,7 +1015,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
 //  3. options
 //  4. internal_comparator
 //  5. index_type
-Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
+Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
  // Some old version of block-based tables don't have index type present in
  // table properties. If that's the case we can safely use the kBinarySearch.
  auto index_type = BlockBasedTableOptions::kBinarySearch;
@ -989,11 +1026,30 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) const {
        DecodeFixed32(pos->second.c_str()));
  }
  auto file = rep_->file.get();
  const auto& index_handle = rep_->index_handle;
  auto env = rep_->options.env;
  auto comparator = &rep_->internal_comparator;
  switch (index_type) {
    case BlockBasedTableOptions::kBinarySearch: {
-      return BinarySearchIndexReader::Create(
+      return BinarySearchIndexReader::Create(file, index_handle, env,
-          rep_->file.get(), rep_->index_handle, rep_->options.env,
+                                             comparator, index_reader);
-          &rep_->internal_comparator, index_reader);
+    }
    case BlockBasedTableOptions::kHashSearch: {
      // We need to wrap data with internal_prefix_transform to make sure it can
      // handle prefix correctly.
      rep_->internal_prefix_transform.reset(
          new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
      return HashIndexReader::Create(
          file, index_handle, env, comparator,
          [&](Iterator* index_iter) {
            return NewTwoLevelIterator(
                index_iter, &BlockBasedTable::DataBlockReader,
                const_cast<BlockBasedTable*>(this), ReadOptions(),
                rep_->soptions, rep_->internal_comparator);
          },
          rep_->internal_prefix_transform.get(), index_reader);
    }
    default: {
      std::string error_message =
--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@ -131,7 +131,7 @@ class BlockBasedTable : public TableReader {
  //  2. index is not present in block cache.
  //  3. We disallowed any io to be performed, that is, read_options ==
  //     kBlockCacheTier
-  Iterator* NewIndexIterator(const ReadOptions& read_options) const;
+  Iterator* NewIndexIterator(const ReadOptions& read_options);
  // Read block cache from block caches (if set): block_cache and
  // block_cache_compressed.
@ -164,7 +164,7 @@ class BlockBasedTable : public TableReader {
  void ReadMeta(const Footer& footer);
  void ReadFilter(const Slice& filter_handle_value);
-  Status CreateIndexReader(IndexReader** index_reader) const;
+  Status CreateIndexReader(IndexReader** index_reader);
  // Read the meta block from sst.
  static Status ReadMetaBlock(
--- a/table/block_test.cc
+++ b/table/block_test.cc
@ -3,7 +3,10 @@
 //  LICENSE file in the root directory of this source tree. An additional grant
 //  of patent rights can be found in the PATENTS file in the same directory.
 //
 #include <stdio.h>
 #include <string>
 #include <vector>
 #include "db/dbformat.h"
 #include "db/memtable.h"
 #include "db/write_batch_internal.h"
@ -11,9 +14,11 @@
 #include "rocksdb/env.h"
 #include "rocksdb/iterator.h"
 #include "rocksdb/table.h"
 #include "rocksdb/slice_transform.h"
 #include "table/block.h"
 #include "table/block_builder.h"
 #include "table/format.h"
 #include "table/block_hash_index.h"
 #include "util/random.h"
 #include "util/testharness.h"
 #include "util/testutil.h"
@ -25,6 +30,40 @@ static std::string RandomString(Random* rnd, int len) {
  test::RandomString(rnd, len, &r);
  return r;
 }
 std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
                        Random *rnd) {
  char buf[50];
  char *p = &buf[0];
  snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
  std::string k(p);
  if (padding_size) {
    k += RandomString(rnd, padding_size);
  }
  return k;
 }
 // Generate random key value pairs.
 // The generated key will be sorted. You can tune the parameters to generated
 // different kinds of test key/value pairs for different scenario.
 void GenerateRandomKVs(std::vector<std::string> *keys,
                       std::vector<std::string> *values, const int from,
                       const int len, const int step = 1,
                       const int padding_size = 0,
                       const int keys_share_prefix = 1) {
  Random rnd(302);
  // generate different prefix
  for (int i = from; i < from + len; i += step) {
    // generating keys that shares the prefix
    for (int j = 0; j < keys_share_prefix; ++j) {
      keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
      // 100 bytes values
      values->emplace_back(RandomString(&rnd, 100));
    }
  }
 }
 class BlockTest {};
@ -39,24 +78,11 @@ TEST(BlockTest, SimpleTest) {
  std::vector<std::string> values;
  BlockBuilder builder(options, ic.get());
  int num_records = 100000;
  char buf[10];
  char* p = &buf[0];
  GenerateRandomKVs(&keys, &values, 0, num_records);
  // add a bunch of records to a block
  for (int i = 0; i < num_records; i++) {
-    // generate random kvs
+    builder.Add(keys[i], values[i]);
    sprintf(p, "%6d", i);
    std::string k(p);
    std::string v = RandomString(&rnd, 100); // 100 byte values
    // write kvs to the block
    Slice key(k);
    Slice value(v);
    builder.Add(key, value);
    // remember kvs in a lookaside array
    keys.push_back(k);
    values.push_back(v);
  }
  // read serialized contents of the block
@ -101,6 +127,114 @@ TEST(BlockTest, SimpleTest) {
  delete iter;
 }
 // return the block contents
 BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
                               const std::vector<std::string> &keys,
                               const std::vector<std::string> &values,
                               const int prefix_group_size = 1) {
  builder->reset(
      new BlockBuilder(1 /* restart interval */, BytewiseComparator()));
  // Add only half of the keys
  for (size_t i = 0; i < keys.size(); ++i) {
    (*builder)->Add(keys[i], values[i]);
  }
  Slice rawblock = (*builder)->Finish();
  BlockContents contents;
  contents.data = rawblock;
  contents.cachable = false;
  contents.heap_allocated = false;
  return contents;
 }
 void CheckBlockContents(BlockContents contents, const int max_key,
                        const std::vector<std::string> &keys,
                        const std::vector<std::string> &values) {
  const size_t prefix_size = 6;
  // create block reader
  Block reader1(contents);
  Block reader2(contents);
  std::unique_ptr<const SliceTransform> prefix_extractor(
      NewFixedPrefixTransform(prefix_size));
  {
    auto iter1 = reader1.NewIterator(nullptr);
    auto iter2 = reader1.NewIterator(nullptr);
    reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(),
                                                   BytewiseComparator(),
                                                   prefix_extractor.get()));
    delete iter1;
    delete iter2;
  }
  std::unique_ptr<Iterator> hash_iter(
      reader1.NewIterator(BytewiseComparator()));
  std::unique_ptr<Iterator> regular_iter(
      reader2.NewIterator(BytewiseComparator()));
  // Seek existent keys
  for (size_t i = 0; i < keys.size(); i++) {
    hash_iter->Seek(keys[i]);
    ASSERT_OK(hash_iter->status());
    ASSERT_TRUE(hash_iter->Valid());
    Slice v = hash_iter->value();
    ASSERT_EQ(v.ToString().compare(values[i]), 0);
  }
  // Seek non-existent keys.
  // For hash index, if no key with a given prefix is not found, iterator will
  // simply be set as invalid; whereas the binary search based iterator will
  // return the one that is closest.
  for (int i = 1; i < max_key - 1; i += 2) {
    auto key = GenerateKey(i, 0, 0, nullptr);
    hash_iter->Seek(key);
    ASSERT_TRUE(!hash_iter->Valid());
    regular_iter->Seek(key);
    ASSERT_TRUE(regular_iter->Valid());
  }
 }
 // In this test case, no two key share same prefix.
 TEST(BlockTest, SimpleIndexHash) {
  const int kMaxKey = 100000;
  std::vector<std::string> keys;
  std::vector<std::string> values;
  GenerateRandomKVs(&keys, &values, 0 /* first key id */,
                    kMaxKey /* last key id */, 2 /* step */,
                    8 /* padding size (8 bytes randomly generated suffix) */);
  std::unique_ptr<BlockBuilder> builder;
  auto contents = GetBlockContents(&builder, keys, values);
  CheckBlockContents(contents, kMaxKey, keys, values);
 }
 TEST(BlockTest, IndexHashWithSharedPrefix) {
  const int kMaxKey = 100000;
  // for each prefix, there will be 5 keys starts with it.
  const int kPrefixGroup = 5;
  std::vector<std::string> keys;
  std::vector<std::string> values;
  // Generate keys with same prefix.
  GenerateRandomKVs(&keys, &values, 0,  // first key id
                    kMaxKey,            // last key id
                    2,                  // step
                    10,                 // padding size,
                    kPrefixGroup);
  std::unique_ptr<BlockBuilder> builder;
  auto contents = GetBlockContents(&builder, keys, values, kPrefixGroup);
  CheckBlockContents(contents, kMaxKey, keys, values);
 }
 }  // namespace rocksdb
 int main(int argc, char** argv) {
--- a/table/table_test.cc
+++ b/table/table_test.cc
@ -1055,6 +1055,116 @@ static std::string RandomString(Random* rnd, int len) {
  return r;
 }
 void AddInternalKey(TableConstructor* c, const std::string prefix,
                    int suffix_len = 800) {
  static Random rnd(1023);
  InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
  c->Add(k.Encode().ToString(), "v");
 }
 TEST(TableTest, HashIndexTest) {
  TableConstructor c(BytewiseComparator());
  // keys with prefix length 3, make sure the key/value is big enough to fill
  // one block
  AddInternalKey(&c, "0015");
  AddInternalKey(&c, "0035");
  AddInternalKey(&c, "0054");
  AddInternalKey(&c, "0055");
  AddInternalKey(&c, "0056");
  AddInternalKey(&c, "0057");
  AddInternalKey(&c, "0058");
  AddInternalKey(&c, "0075");
  AddInternalKey(&c, "0076");
  AddInternalKey(&c, "0095");
  std::vector<std::string> keys;
  KVMap kvmap;
  Options options;
  BlockBasedTableOptions table_options;
  table_options.index_type = BlockBasedTableOptions::kHashSearch;
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
  options.prefix_extractor.reset(NewFixedPrefixTransform(3));
  options.block_cache = NewLRUCache(1024);
  options.block_size = 1700;
  std::unique_ptr<InternalKeyComparator> comparator(
      new InternalKeyComparator(BytewiseComparator()));
  c.Finish(options, *comparator, &keys, &kvmap);
  auto reader = c.table_reader();
  auto props = c.table_reader()->GetTableProperties();
  ASSERT_EQ(5u, props->num_data_blocks);
  std::unique_ptr<Iterator> hash_iter(reader->NewIterator(ReadOptions()));
  // -- Find keys do not exist, but have common prefix.
  std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
  std::vector<std::string> lower_bound = {keys[0], keys[1], keys[2],
                                          keys[7], keys[9], };
  // find the lower bound of the prefix
  for (size_t i = 0; i < prefixes.size(); ++i) {
    hash_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
    ASSERT_OK(hash_iter->status());
    ASSERT_TRUE(hash_iter->Valid());
    // seek the first element in the block
    ASSERT_EQ(lower_bound[i], hash_iter->key().ToString());
    ASSERT_EQ("v", hash_iter->value().ToString());
  }
  // find the upper bound of prefixes
  std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
  // find existing keys
  for (const auto& item : kvmap) {
    auto ukey = ExtractUserKey(item.first).ToString();
    hash_iter->Seek(ukey);
    // ASSERT_OK(regular_iter->status());
    ASSERT_OK(hash_iter->status());
    // ASSERT_TRUE(regular_iter->Valid());
    ASSERT_TRUE(hash_iter->Valid());
    ASSERT_EQ(item.first, hash_iter->key().ToString());
    ASSERT_EQ(item.second, hash_iter->value().ToString());
  }
  for (size_t i = 0; i < prefixes.size(); ++i) {
    // the key is greater than any existing keys.
    auto key = prefixes[i] + "9";
    hash_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
    ASSERT_OK(hash_iter->status());
    if (i == prefixes.size() - 1) {
      // last key
      ASSERT_TRUE(!hash_iter->Valid());
    } else {
      ASSERT_TRUE(hash_iter->Valid());
      // seek the first element in the block
      ASSERT_EQ(upper_bound[i], hash_iter->key().ToString());
      ASSERT_EQ("v", hash_iter->value().ToString());
    }
  }
  // find keys with prefix that don't match any of the existing prefixes.
  std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
  for (const auto& prefix : non_exist_prefixes) {
    hash_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
    // regular_iter->Seek(prefix);
    ASSERT_OK(hash_iter->status());
    ASSERT_TRUE(!hash_iter->Valid());
  }
 }
 // It's very hard to figure out the index block size of a block accurately.
 // To make sure we get the index size, we just make sure as key number
 // grows, the filter block size also grows.