Add BlockBasedTableOptions::index_block_restart_interval
Summary: Add a new option to BlockBasedTableOptions that will allow us to change the restart interval for the index block Test Plan: unit tests Reviewers: yhchiang, anthony, andrewkr, sdong Reviewed By: sdong Subscribers: march, dhruba Differential Revision: https://reviews.facebook.net/D53721
This commit is contained in:
parent
8d31cca4f9
commit
bfde4d73b7
@ -5272,6 +5272,7 @@ class DBTestRandomized : public DBTest,
|
||||
option_configs.push_back(option_config);
|
||||
}
|
||||
}
|
||||
option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
|
||||
return option_configs;
|
||||
}
|
||||
};
|
||||
|
@ -339,6 +339,10 @@ Options DBTestBase::CurrentOptions(
|
||||
options.prefix_extractor.reset(NewNoopTransform());
|
||||
break;
|
||||
}
|
||||
case kBlockBasedTableWithIndexRestartInterval: {
|
||||
table_options.index_block_restart_interval = 8;
|
||||
break;
|
||||
}
|
||||
case kOptimizeFiltersForHits: {
|
||||
options.optimize_filters_for_hits = true;
|
||||
set_block_based_table_factory = true;
|
||||
|
@ -529,6 +529,7 @@ class DBTestBase : public testing::Test {
|
||||
kEnd = 31,
|
||||
kLevelSubcompactions = 31,
|
||||
kUniversalSubcompactions = 32,
|
||||
kBlockBasedTableWithIndexRestartInterval = 33,
|
||||
};
|
||||
int option_config_;
|
||||
|
||||
|
@ -120,6 +120,9 @@ struct BlockBasedTableOptions {
|
||||
// value will be silently overwritten with 1.
|
||||
int block_restart_interval = 16;
|
||||
|
||||
// Same as block_restart_interval but used for the index block.
|
||||
int index_block_restart_interval = 1;
|
||||
|
||||
// Use delta encoding to compress keys in blocks.
|
||||
// Iterator::PinData() requires this option to be disabled.
|
||||
//
|
||||
|
@ -113,15 +113,17 @@ class IndexBuilder {
|
||||
//
|
||||
// Optimizations:
|
||||
// 1. Made block's `block_restart_interval` to be 1, which will avoid linear
|
||||
// search when doing index lookup.
|
||||
// search when doing index lookup (can be disabled by setting
|
||||
// index_block_restart_interval).
|
||||
// 2. Shorten the key length for index block. Other than honestly using the
|
||||
// last key in the data block as the index key, we instead find a shortest
|
||||
// substitute key that serves the same function.
|
||||
class ShortenedIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator)
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator,
|
||||
int index_block_restart_interval)
|
||||
: IndexBuilder(comparator),
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */) {}
|
||||
index_block_builder_(index_block_restart_interval) {}
|
||||
|
||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||
const Slice* first_key_in_next_block,
|
||||
@ -178,9 +180,10 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
||||
class HashIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit HashIndexBuilder(const Comparator* comparator,
|
||||
const SliceTransform* hash_key_extractor)
|
||||
const SliceTransform* hash_key_extractor,
|
||||
int index_block_restart_interval)
|
||||
: IndexBuilder(comparator),
|
||||
primary_index_builder_(comparator),
|
||||
primary_index_builder_(comparator, index_block_restart_interval),
|
||||
hash_key_extractor_(hash_key_extractor) {}
|
||||
|
||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||
@ -266,13 +269,16 @@ namespace {
|
||||
|
||||
// Create a index builder based on its type.
|
||||
IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator,
|
||||
const SliceTransform* prefix_extractor) {
|
||||
const SliceTransform* prefix_extractor,
|
||||
int index_block_restart_interval) {
|
||||
switch (type) {
|
||||
case BlockBasedTableOptions::kBinarySearch: {
|
||||
return new ShortenedIndexBuilder(comparator);
|
||||
return new ShortenedIndexBuilder(comparator,
|
||||
index_block_restart_interval);
|
||||
}
|
||||
case BlockBasedTableOptions::kHashSearch: {
|
||||
return new HashIndexBuilder(comparator, prefix_extractor);
|
||||
return new HashIndexBuilder(comparator, prefix_extractor,
|
||||
index_block_restart_interval);
|
||||
}
|
||||
default: {
|
||||
assert(!"Do not recognize the index type ");
|
||||
@ -484,9 +490,10 @@ struct BlockBasedTableBuilder::Rep {
|
||||
data_block(table_options.block_restart_interval,
|
||||
table_options.use_delta_encoding),
|
||||
internal_prefix_transform(_ioptions.prefix_extractor),
|
||||
index_builder(CreateIndexBuilder(table_options.index_type,
|
||||
&internal_comparator,
|
||||
&this->internal_prefix_transform)),
|
||||
index_builder(
|
||||
CreateIndexBuilder(table_options.index_type, &internal_comparator,
|
||||
&this->internal_prefix_transform,
|
||||
table_options.index_block_restart_interval)),
|
||||
compression_type(_compression_type),
|
||||
compression_opts(_compression_opts),
|
||||
filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder(
|
||||
|
@ -42,6 +42,9 @@ BlockBasedTableFactory::BlockBasedTableFactory(
|
||||
if (table_options_.block_restart_interval < 1) {
|
||||
table_options_.block_restart_interval = 1;
|
||||
}
|
||||
if (table_options_.index_block_restart_interval < 1) {
|
||||
table_options_.index_block_restart_interval = 1;
|
||||
}
|
||||
}
|
||||
|
||||
Status BlockBasedTableFactory::NewTableReader(
|
||||
@ -150,6 +153,9 @@ std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
|
||||
snprintf(buffer, kBufferSize, " block_restart_interval: %d\n",
|
||||
table_options_.block_restart_interval);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " index_block_restart_interval: %d\n",
|
||||
table_options_.index_block_restart_interval);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " filter_policy: %s\n",
|
||||
table_options_.filter_policy == nullptr ?
|
||||
"nullptr" : table_options_.filter_policy->Name());
|
||||
|
@ -637,6 +637,7 @@ class HarnessTest : public testing::Test {
|
||||
new FlushBlockBySizePolicyFactory());
|
||||
table_options_.block_size = 256;
|
||||
table_options_.block_restart_interval = args.restart_interval;
|
||||
table_options_.index_block_restart_interval = args.restart_interval;
|
||||
table_options_.format_version = args.format_version;
|
||||
options_.table_factory.reset(
|
||||
new BlockBasedTableFactory(table_options_));
|
||||
@ -2282,6 +2283,67 @@ TEST_F(HarnessTest, FooterTests) {
|
||||
}
|
||||
}
|
||||
|
||||
class IndexBlockRestartIntervalTest
|
||||
: public BlockBasedTableTest,
|
||||
public ::testing::WithParamInterface<int> {
|
||||
public:
|
||||
static std::vector<int> GetRestartValues() { return {-1, 0, 1, 8, 16, 32}; }
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
|
||||
::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
|
||||
|
||||
TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
|
||||
const int kKeysInTable = 10000;
|
||||
const int kKeySize = 100;
|
||||
const int kValSize = 500;
|
||||
|
||||
int index_block_restart_interval = GetParam();
|
||||
|
||||
Options options;
|
||||
BlockBasedTableOptions table_options;
|
||||
table_options.block_size = 64; // small block size to get big index block
|
||||
table_options.index_block_restart_interval = index_block_restart_interval;
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
|
||||
TableConstructor c(BytewiseComparator());
|
||||
static Random rnd(301);
|
||||
for (int i = 0; i < kKeysInTable; i++) {
|
||||
InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
|
||||
c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
|
||||
}
|
||||
|
||||
std::vector<std::string> keys;
|
||||
stl_wrappers::KVMap kvmap;
|
||||
std::unique_ptr<InternalKeyComparator> comparator(
|
||||
new InternalKeyComparator(BytewiseComparator()));
|
||||
const ImmutableCFOptions ioptions(options);
|
||||
c.Finish(options, ioptions, table_options, *comparator, &keys, &kvmap);
|
||||
auto reader = c.GetTableReader();
|
||||
|
||||
std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(ReadOptions()));
|
||||
|
||||
// Test point lookup
|
||||
for (auto& kv : kvmap) {
|
||||
db_iter->Seek(kv.first);
|
||||
|
||||
ASSERT_TRUE(db_iter->Valid());
|
||||
ASSERT_OK(db_iter->status());
|
||||
ASSERT_EQ(db_iter->key(), kv.first);
|
||||
ASSERT_EQ(db_iter->value(), kv.second);
|
||||
}
|
||||
|
||||
// Test iterating
|
||||
auto kv_iter = kvmap.begin();
|
||||
for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
|
||||
ASSERT_EQ(db_iter->key(), kv_iter->first);
|
||||
ASSERT_EQ(db_iter->value(), kv_iter->second);
|
||||
kv_iter++;
|
||||
}
|
||||
ASSERT_EQ(kv_iter, kvmap.end());
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -486,6 +486,9 @@ static std::unordered_map<std::string,
|
||||
{"block_restart_interval",
|
||||
{offsetof(struct BlockBasedTableOptions, block_restart_interval),
|
||||
OptionType::kInt, OptionVerificationType::kNormal}},
|
||||
{"index_block_restart_interval",
|
||||
{offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
|
||||
OptionType::kInt, OptionVerificationType::kNormal}},
|
||||
{"filter_policy",
|
||||
{offsetof(struct BlockBasedTableOptions, filter_policy),
|
||||
OptionType::kFilterPolicy, OptionVerificationType::kByName}},
|
||||
@ -493,7 +496,7 @@ static std::unordered_map<std::string,
|
||||
{offsetof(struct BlockBasedTableOptions, whole_key_filtering),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
||||
{"skip_table_builder_flush",
|
||||
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
|
||||
{offsetof(struct BlockBasedTableOptions, skip_table_builder_flush),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal}},
|
||||
{"format_version",
|
||||
{offsetof(struct BlockBasedTableOptions, format_version),
|
||||
|
@ -1575,7 +1575,8 @@ TEST_F(OptionsParserTest, BlockBasedTableOptionsAllFieldsSettable) {
|
||||
"cache_index_and_filter_blocks=1;index_type=kHashSearch;"
|
||||
"checksum=kxxHash;hash_index_allow_collision=1;no_block_cache=1;"
|
||||
"block_cache=1M;block_cache_compressed=1k;block_size=1024;"
|
||||
"block_size_deviation=8;block_restart_interval=4;"
|
||||
"block_size_deviation=8;block_restart_interval=4; "
|
||||
"index_block_restart_interval=4;"
|
||||
"filter_policy=bloomfilter:4:true;whole_key_filtering=1;"
|
||||
"skip_table_builder_flush=1;format_version=1;"
|
||||
"hash_index_allow_collision=false;",
|
||||
|
@ -200,6 +200,7 @@ BlockBasedTableOptions RandomBlockBasedTableOptions(Random* rnd) {
|
||||
opt.block_size = rnd->Uniform(10000000);
|
||||
opt.block_size_deviation = rnd->Uniform(100);
|
||||
opt.block_restart_interval = rnd->Uniform(100);
|
||||
opt.index_block_restart_interval = rnd->Uniform(100);
|
||||
opt.whole_key_filtering = rnd->Uniform(2);
|
||||
|
||||
return opt;
|
||||
|
Loading…
Reference in New Issue
Block a user