Add BlockBasedTableOptions::index_block_restart_interval

Summary: Add a new option to BlockBasedTableOptions that will allow us to change the restart interval for the index block

Test Plan: unit tests

Reviewers: yhchiang, anthony, andrewkr, sdong

Reviewed By: sdong

Subscribers: march, dhruba

Differential Revision: https://reviews.facebook.net/D53721
This commit is contained in:
Islam AbdelRahman 2016-02-05 10:22:37 -08:00 committed by sdong
parent 8d31cca4f9
commit bfde4d73b7
10 changed files with 102 additions and 13 deletions

View File

@ -5272,6 +5272,7 @@ class DBTestRandomized : public DBTest,
option_configs.push_back(option_config);
}
}
option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
return option_configs;
}
};

View File

@ -339,6 +339,10 @@ Options DBTestBase::CurrentOptions(
options.prefix_extractor.reset(NewNoopTransform());
break;
}
case kBlockBasedTableWithIndexRestartInterval: {
table_options.index_block_restart_interval = 8;
break;
}
case kOptimizeFiltersForHits: {
options.optimize_filters_for_hits = true;
set_block_based_table_factory = true;

View File

@ -529,6 +529,7 @@ class DBTestBase : public testing::Test {
kEnd = 31,
kLevelSubcompactions = 31,
kUniversalSubcompactions = 32,
kBlockBasedTableWithIndexRestartInterval = 33,
};
int option_config_;

View File

@ -120,6 +120,9 @@ struct BlockBasedTableOptions {
// value will be silently overwritten with 1.
int block_restart_interval = 16;
// Same as block_restart_interval but used for the index block.
int index_block_restart_interval = 1;
// Use delta encoding to compress keys in blocks.
// Iterator::PinData() requires this option to be disabled.
//

View File

@ -113,15 +113,17 @@ class IndexBuilder {
//
// Optimizations:
// 1. Made block's `block_restart_interval` to be 1, which will avoid linear
// search when doing index lookup.
// search when doing index lookup (can be disabled by setting
// index_block_restart_interval).
// 2. Shorten the key length for index block. Other than honestly using the
// last key in the data block as the index key, we instead find a shortest
// substitute key that serves the same function.
class ShortenedIndexBuilder : public IndexBuilder {
public:
explicit ShortenedIndexBuilder(const Comparator* comparator)
explicit ShortenedIndexBuilder(const Comparator* comparator,
int index_block_restart_interval)
: IndexBuilder(comparator),
index_block_builder_(1 /* block_restart_interval == 1 */) {}
index_block_builder_(index_block_restart_interval) {}
virtual void AddIndexEntry(std::string* last_key_in_current_block,
const Slice* first_key_in_next_block,
@ -178,9 +180,10 @@ class ShortenedIndexBuilder : public IndexBuilder {
class HashIndexBuilder : public IndexBuilder {
public:
explicit HashIndexBuilder(const Comparator* comparator,
const SliceTransform* hash_key_extractor)
const SliceTransform* hash_key_extractor,
int index_block_restart_interval)
: IndexBuilder(comparator),
primary_index_builder_(comparator),
primary_index_builder_(comparator, index_block_restart_interval),
hash_key_extractor_(hash_key_extractor) {}
virtual void AddIndexEntry(std::string* last_key_in_current_block,
@ -266,13 +269,16 @@ namespace {
// Create a index builder based on its type.
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator,
const SliceTransform* prefix_extractor) {
const SliceTransform* prefix_extractor,
int index_block_restart_interval) {
switch (type) {
case BlockBasedTableOptions::kBinarySearch: {
return new ShortenedIndexBuilder(comparator);
return new ShortenedIndexBuilder(comparator,
index_block_restart_interval);
}
case BlockBasedTableOptions::kHashSearch: {
return new HashIndexBuilder(comparator, prefix_extractor);
return new HashIndexBuilder(comparator, prefix_extractor,
index_block_restart_interval);
}
default: {
assert(!"Do not recognize the index type ");
@ -484,9 +490,10 @@ struct BlockBasedTableBuilder::Rep {
data_block(table_options.block_restart_interval,
table_options.use_delta_encoding),
internal_prefix_transform(_ioptions.prefix_extractor),
index_builder(CreateIndexBuilder(table_options.index_type,
&internal_comparator,
&this->internal_prefix_transform)),
index_builder(
CreateIndexBuilder(table_options.index_type, &internal_comparator,
&this->internal_prefix_transform,
table_options.index_block_restart_interval)),
compression_type(_compression_type),
compression_opts(_compression_opts),
filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder(

View File

@ -42,6 +42,9 @@ BlockBasedTableFactory::BlockBasedTableFactory(
if (table_options_.block_restart_interval < 1) {
table_options_.block_restart_interval = 1;
}
if (table_options_.index_block_restart_interval < 1) {
table_options_.index_block_restart_interval = 1;
}
}
Status BlockBasedTableFactory::NewTableReader(
@ -150,6 +153,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
snprintf(buffer, kBufferSize, " block_restart_interval: %d\n",
table_options_.block_restart_interval);
ret.append(buffer);
snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n",
table_options_.index_block_restart_interval);
ret.append(buffer);
snprintf(buffer, kBufferSize, " filter_policy: %s\n",
table_options_.filter_policy == nullptr ?
"nullptr" : table_options_.filter_policy->Name());

View File

@ -637,6 +637,7 @@ class HarnessTest : public testing::Test {
new FlushBlockBySizePolicyFactory());
table_options_.block_size = 256;
table_options_.block_restart_interval = args.restart_interval;
table_options_.index_block_restart_interval = args.restart_interval;
table_options_.format_version = args.format_version;
options_.table_factory.reset(
new BlockBasedTableFactory(table_options_));
@ -2282,6 +2283,67 @@ TEST_F(HarnessTest, FooterTests) {
}
}
class IndexBlockRestartIntervalTest
: public BlockBasedTableTest,
public ::testing::WithParamInterface<int> {
public:
static std::vector<int> GetRestartValues() { return {-1, 0, 1, 8, 16, 32}; }
};
INSTANTIATE_TEST_CASE_P(
IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
const int kKeysInTable = 10000;
const int kKeySize = 100;
const int kValSize = 500;
int index_block_restart_interval = GetParam();
Options options;
BlockBasedTableOptions table_options;
table_options.block_size = 64; // small block size to get big index block
table_options.index_block_restart_interval = index_block_restart_interval;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
TableConstructor c(BytewiseComparator());
static Random rnd(301);
for (int i = 0; i < kKeysInTable; i++) {
InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
}
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;
std::unique_ptr<InternalKeyComparator> comparator(
new InternalKeyComparator(BytewiseComparator()));
const ImmutableCFOptions ioptions(options);
c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap);
auto reader = c.GetTableReader();
std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(ReadOptions()));
// Test point lookup
for (auto& kv : kvmap) {
db_iter->Seek(kv.first);
ASSERT_TRUE(db_iter->Valid());
ASSERT_OK(db_iter->status());
ASSERT_EQ(db_iter->key(), kv.first);
ASSERT_EQ(db_iter->value(), kv.second);
}
// Test iterating
auto kv_iter = kvmap.begin();
for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
ASSERT_EQ(db_iter->key(), kv_iter->first);
ASSERT_EQ(db_iter->value(), kv_iter->second);
kv_iter++;
}
ASSERT_EQ(kv_iter, kvmap.end());
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -486,6 +486,9 @@ static std::unordered_map<std::string,
{"block_restart_interval",
{offsetof(struct BlockBasedTableOptions, block_restart_interval),
OptionType::kInt, OptionVerificationType::kNormal}},
{"index_block_restart_interval",
{offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
OptionType::kInt, OptionVerificationType::kNormal}},
{"filter_policy",
{offsetof(struct BlockBasedTableOptions, filter_policy),
OptionType::kFilterPolicy, OptionVerificationType::kByName}},
@ -493,7 +496,7 @@ static std::unordered_map<std::string,
{offsetof(struct BlockBasedTableOptions, whole_key_filtering),
OptionType::kBoolean, OptionVerificationType::kNormal}},
{"skip_table_builder_flush",
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
OptionType::kBoolean, OptionVerificationType::kNormal}},
{"format_version",
{offsetof(struct BlockBasedTableOptions, format_version),

View File

@ -1575,7 +1575,8 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) {
"cache_index_and_filter_blocks=1;index_type=kHashSearch;"
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
"block_cache=1M;block_cache_compressed=1k;block_size=1024;"
"block_size_deviation=8;block_restart_interval=4;"
"block_size_deviation=8;block_restart_interval=4; "
"index_block_restart_interval=4;"
"filter_policy=bloomfilter:4:true;whole_key_filtering=1;"
"skip_table_builder_flush=1;format_version=1;"
"hash_index_allow_collision=false;",

View File

@ -200,6 +200,7 @@ BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) {
opt.block_size = rnd->Uniform(10000000);
opt.block_size_deviation = rnd->Uniform(100);
opt.block_restart_interval = rnd->Uniform(100);
opt.index_block_restart_interval = rnd->Uniform(100);
opt.whole_key_filtering = rnd->Uniform(2);
return opt;