Fix regression bug of hash index with iterator total order seek (#6328)

Summary:
https://github.com/facebook/rocksdb/pull/6028 introduces a bug for hash index in SST files. If a table reader is created when total order seek is used, prefix_extractor might be passed into table reader as null. While later when prefix seek is used, the same table reader used, hash index is checked but prefix extractor is null and the program would crash.
Fix the issue by fixing http://github.com/facebook/rocksdb/pull/6028 in the way that prefix_extractor is preserved but ReadOptions.total_order_seek is checked

Also, a null pointer check is added so that a bug like this won't cause segfault in the future.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6328

Test Plan: Add a unit test that would fail without the fix. Stress test that reproduces the crash would pass.

Differential Revision: D19586751

fbshipit-source-id: 8de77690167ddf5a77a01e167cf89430b1bfba42
This commit is contained in:
sdong 2020-01-27 15:41:57 -08:00 committed by Facebook Github Bot
parent 986df37135
commit f10f135938
5 changed files with 81 additions and 12 deletions

View File

@ -18,6 +18,7 @@
* Fixed an issue where the thread pools were not resized upon setting `max_background_jobs` dynamically through the `SetDBOptions` interface.
* Fix a bug that can cause write threads to hang when a slowdown/stall happens and there is a mix of writers with WriteOptions::no_slowdown set/unset.
* Fixed an issue where an incorrect "number of input records" value was used to compute the "records dropped" statistics for compactions.
* Fix a regression bug that causes segfault when hash is used, max_open_files != -1 and total order seek is used and switched back.
### New Features
* It is now possible to enable periodic compactions for the base DB when using BlobDB.

View File

@ -3174,6 +3174,57 @@ TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
ASSERT_EQ("v2", Get("k2"));
}
TEST_F(DBTest, BlockBasedTablePrefixIndexTotalOrderSeek) {
// create a DB with block prefix index
BlockBasedTableOptions table_options;
Options options = CurrentOptions();
options.max_open_files = 10;
table_options.index_type = BlockBasedTableOptions::kHashSearch;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
// RocksDB sanitize max open files to at least 20. Modify it back.
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
int* max_open_files = static_cast<int*>(arg);
*max_open_files = 11;
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Reopen(options);
ASSERT_OK(Put("k1", "v1"));
Flush();
CompactRangeOptions cro;
cro.change_level = true;
cro.target_level = 1;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
// Force evict tables
dbfull()->TEST_table_cache()->SetCapacity(0);
// Make table cache to keep one entry.
dbfull()->TEST_table_cache()->SetCapacity(1);
ReadOptions read_options;
read_options.total_order_seek = true;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->Seek("k1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("k1", iter->key().ToString());
}
// After total order seek, prefix index should still be used.
read_options.total_order_seek = false;
{
std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
iter->Seek("k1");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("k1", iter->key().ToString());
}
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(DBTest, ChecksumTest) {
BlockBasedTableOptions table_options;
Options options = CurrentOptions();

View File

@ -867,8 +867,7 @@ class LevelIterator final : public InternalIterator {
icomparator_(icomparator),
user_comparator_(icomparator.user_comparator()),
flevel_(flevel),
prefix_extractor_(read_options.total_order_seek ? nullptr
: prefix_extractor),
prefix_extractor_(prefix_extractor),
file_read_hist_(file_read_hist),
should_sample_(should_sample),
caller_(caller),
@ -1003,6 +1002,9 @@ class LevelIterator final : public InternalIterator {
const UserComparatorWrapper user_comparator_;
const LevelFilesBrief* flevel_;
mutable FileDescriptor current_value_;
// `prefix_extractor_` may be non-null even for total order seek. Checking
// this variable is not the right way to identify whether prefix iterator
// is used.
const SliceTransform* prefix_extractor_;
HistogramImpl* file_read_hist_;
@ -1045,7 +1047,8 @@ void LevelIterator::Seek(const Slice& target) {
file_iter_.Seek(target);
}
if (SkipEmptyFileForward() && prefix_extractor_ != nullptr &&
file_iter_.iter() != nullptr && file_iter_.Valid()) {
!read_options_.total_order_seek && file_iter_.iter() != nullptr &&
file_iter_.Valid()) {
// We've skipped the file we initially positioned to. In the prefix
// seek case, it is likely that the file is skipped because of
// prefix bloom or hash, where more keys are skipped. We then check

View File

@ -718,6 +718,7 @@ class HashIndexReader : public BlockBasedTable::IndexReaderCommon {
}
BlockPrefixIndex* prefix_index = nullptr;
assert(rep->internal_prefix_transform.get() != nullptr);
s = BlockPrefixIndex::Create(rep->internal_prefix_transform.get(),
prefixes_contents.data,
prefixes_meta_contents.data, &prefix_index);
@ -1187,8 +1188,10 @@ Status BlockBasedTable::Open(
rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
// We need to wrap data with internal_prefix_transform to make sure it can
// handle prefix correctly.
if (prefix_extractor != nullptr) {
rep->internal_prefix_transform.reset(
new InternalKeySliceTransform(prefix_extractor));
}
SetupCacheKeyPrefix(rep);
std::unique_ptr<BlockBasedTable> new_table(
new BlockBasedTable(rep, block_cache_tracer));
@ -4049,7 +4052,13 @@ Status BlockBasedTable::CreateIndexReader(
std::unique_ptr<Block> metaindex_guard;
std::unique_ptr<InternalIterator> metaindex_iter_guard;
auto meta_index_iter = preloaded_meta_index_iter;
if (meta_index_iter == nullptr) {
bool should_fallback = false;
if (rep_->internal_prefix_transform.get() == nullptr) {
ROCKS_LOG_WARN(rep_->ioptions.info_log,
"No prefix extractor passed in. Fall back to binary"
" search index.");
should_fallback = true;
} else if (meta_index_iter == nullptr) {
auto s = ReadMetaIndexBlock(prefetch_buffer, &metaindex_guard,
&metaindex_iter_guard);
if (!s.ok()) {
@ -4058,17 +4067,21 @@ Status BlockBasedTable::CreateIndexReader(
ROCKS_LOG_WARN(rep_->ioptions.info_log,
"Unable to read the metaindex block."
" Fall back to binary search index.");
return BinarySearchIndexReader::Create(this, prefetch_buffer,
use_cache, prefetch, pin,
lookup_context, index_reader);
should_fallback = true;
}
meta_index_iter = metaindex_iter_guard.get();
}
if (should_fallback) {
return BinarySearchIndexReader::Create(this, prefetch_buffer, use_cache,
prefetch, pin, lookup_context,
index_reader);
} else {
return HashIndexReader::Create(this, prefetch_buffer, meta_index_iter,
use_cache, prefetch, pin, lookup_context,
index_reader);
}
}
default: {
std::string error_message =
"Unrecognized index type: " + ToString(rep_->index_type);

View File

@ -548,6 +548,7 @@ struct BlockBasedTable::Rep {
// module should not be relying on db module. However to make things easier
// and compatible with existing code, we introduce a wrapper that allows
// block to extract prefix without knowing if a key is internal or not.
// null if no prefix_extractor is passed in when opening the table reader.
std::unique_ptr<SliceTransform> internal_prefix_transform;
std::shared_ptr<const SliceTransform> table_prefix_extractor;