Remove cuckoo hash memtable (#4953)
Summary: Cuckoo Hash is less useful than we initially expected. Remove it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4953 Differential Revision: D13979264 Pulled By: siying fbshipit-source-id: 2a60afdaa989f045357398b43a1cc5d46f4492ed
This commit is contained in:
parent
199fabc197
commit
cf3a671733
@ -530,7 +530,6 @@ set(SOURCES
|
||||
env/env_hdfs.cc
|
||||
env/mock_env.cc
|
||||
memtable/alloc_tracker.cc
|
||||
memtable/hash_cuckoo_rep.cc
|
||||
memtable/hash_linklist_rep.cc
|
||||
memtable/hash_skiplist_rep.cc
|
||||
memtable/skiplistrep.cc
|
||||
|
@ -17,6 +17,7 @@
|
||||
* Change time resolution in FileOperationInfo.
|
||||
* Deleting Blob files also go through SStFileManager.
|
||||
* Remove PlainTable's store_index_in_file feature. When opening an existing DB with index in SST files, the index and bloom filter will still be rebuild while SST files are opened, in the same way as there is no index in the file.
|
||||
* Remove CuckooHash memtable.
|
||||
|
||||
### Bug Fixes
|
||||
* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls.
|
||||
|
1
TARGETS
1
TARGETS
@ -142,7 +142,6 @@ cpp_library(
|
||||
"env/io_posix.cc",
|
||||
"env/mock_env.cc",
|
||||
"memtable/alloc_tracker.cc",
|
||||
"memtable/hash_cuckoo_rep.cc",
|
||||
"memtable/hash_linklist_rep.cc",
|
||||
"memtable/hash_skiplist_rep.cc",
|
||||
"memtable/skiplistrep.cc",
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "db/db_test_util.h"
|
||||
#include "memtable/hash_skiplist_rep.h"
|
||||
#include "options/options_parser.h"
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/db.h"
|
||||
@ -1206,29 +1207,32 @@ TEST_P(ColumnFamilyTest, DifferentWriteBufferSizes) {
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
#ifndef ROCKSDB_LITE // Cuckoo is not supported in lite
|
||||
TEST_P(ColumnFamilyTest, MemtableNotSupportSnapshot) {
|
||||
db_options_.allow_concurrent_memtable_write = false;
|
||||
Open();
|
||||
auto* s1 = dbfull()->GetSnapshot();
|
||||
ASSERT_TRUE(s1 != nullptr);
|
||||
dbfull()->ReleaseSnapshot(s1);
|
||||
// The test is commented out because we want to test that snapshot is
|
||||
// not created for memtables not supported it, but There isn't a memtable
|
||||
// that doesn't support snapshot right now. If we have one later, we can
|
||||
// re-enable the test.
|
||||
//
|
||||
// #ifndef ROCKSDB_LITE // Cuckoo is not supported in lite
|
||||
// TEST_P(ColumnFamilyTest, MemtableNotSupportSnapshot) {
|
||||
// db_options_.allow_concurrent_memtable_write = false;
|
||||
// Open();
|
||||
// auto* s1 = dbfull()->GetSnapshot();
|
||||
// ASSERT_TRUE(s1 != nullptr);
|
||||
// dbfull()->ReleaseSnapshot(s1);
|
||||
|
||||
// Add a column family that doesn't support snapshot
|
||||
ColumnFamilyOptions first;
|
||||
first.memtable_factory.reset(NewHashCuckooRepFactory(1024 * 1024));
|
||||
CreateColumnFamilies({"first"}, {first});
|
||||
auto* s2 = dbfull()->GetSnapshot();
|
||||
ASSERT_TRUE(s2 == nullptr);
|
||||
// // Add a column family that doesn't support snapshot
|
||||
// ColumnFamilyOptions first;
|
||||
// first.memtable_factory.reset(new DummyMemtableNotSupportingSnapshot());
|
||||
// CreateColumnFamilies({"first"}, {first});
|
||||
// auto* s2 = dbfull()->GetSnapshot();
|
||||
// ASSERT_TRUE(s2 == nullptr);
|
||||
|
||||
// Add a column family that supports snapshot. Snapshot stays not supported.
|
||||
ColumnFamilyOptions second;
|
||||
CreateColumnFamilies({"second"}, {second});
|
||||
auto* s3 = dbfull()->GetSnapshot();
|
||||
ASSERT_TRUE(s3 == nullptr);
|
||||
Close();
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
// // Add a column family that supports snapshot. Snapshot stays not
|
||||
// supported. ColumnFamilyOptions second; CreateColumnFamilies({"second"},
|
||||
// {second}); auto* s3 = dbfull()->GetSnapshot(); ASSERT_TRUE(s3 == nullptr);
|
||||
// Close();
|
||||
// }
|
||||
// #endif // !ROCKSDB_LITE
|
||||
|
||||
class TestComparator : public Comparator {
|
||||
int Compare(const rocksdb::Slice& /*a*/,
|
||||
|
@ -215,11 +215,11 @@ TEST_F(DBBasicTest, PutSingleDeleteGet) {
|
||||
ASSERT_EQ("v2", Get(1, "foo2"));
|
||||
ASSERT_OK(SingleDelete(1, "foo"));
|
||||
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because single delete does not get removed when it encounters a merge.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Ski FIFO and universal compaction because they do not apply to the test
|
||||
// case. Skip MergePut because single delete does not get removed when it
|
||||
// encounters a merge.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
TEST_F(DBBasicTest, EmptyFlush) {
|
||||
@ -237,11 +237,11 @@ TEST_F(DBBasicTest, EmptyFlush) {
|
||||
ASSERT_OK(Flush(1));
|
||||
|
||||
ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because merges cannot be combined with single deletions.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Skip FIFO and universal compaction as they do not apply to the test
|
||||
// case. Skip MergePut because merges cannot be combined with single
|
||||
// deletions.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
TEST_F(DBBasicTest, GetFromVersions) {
|
||||
@ -265,11 +265,6 @@ TEST_F(DBBasicTest, GetSnapshot) {
|
||||
std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
|
||||
ASSERT_OK(Put(1, key, "v1"));
|
||||
const Snapshot* s1 = db_->GetSnapshot();
|
||||
if (option_config_ == kHashCuckoo) {
|
||||
// Unsupported case.
|
||||
ASSERT_TRUE(s1 == nullptr);
|
||||
break;
|
||||
}
|
||||
ASSERT_OK(Put(1, key, "v2"));
|
||||
ASSERT_EQ("v2", Get(1, key));
|
||||
ASSERT_EQ("v1", Get(1, key, s1));
|
||||
@ -510,7 +505,7 @@ TEST_F(DBBasicTest, Snapshot) {
|
||||
ASSERT_EQ(0U, GetNumSnapshots());
|
||||
ASSERT_EQ("0v4", Get(0, "foo"));
|
||||
ASSERT_EQ("1v4", Get(1, "foo"));
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
@ -566,8 +561,7 @@ TEST_F(DBBasicTest, CompactBetweenSnapshots) {
|
||||
nullptr);
|
||||
ASSERT_EQ("sixth", Get(1, "foo"));
|
||||
ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]");
|
||||
// skip HashCuckooRep as it does not support snapshot
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction));
|
||||
} while (ChangeOptions(kSkipFIFOCompaction));
|
||||
}
|
||||
|
||||
TEST_F(DBBasicTest, DBOpen_Options) {
|
||||
|
@ -179,9 +179,7 @@ TEST_P(DBIteratorTest, NonBlockingIteration) {
|
||||
|
||||
// This test verifies block cache behaviors, which is not used by plain
|
||||
// table format.
|
||||
// Exclude kHashCuckoo as it does not support iteration currently
|
||||
} while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipHashCuckoo |
|
||||
kSkipMmapReads));
|
||||
} while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
|
||||
}
|
||||
|
||||
TEST_P(DBIteratorTest, IterSeekBeforePrev) {
|
||||
@ -765,8 +763,7 @@ TEST_P(DBIteratorTest, IterWithSnapshot) {
|
||||
}
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
delete iter;
|
||||
// skip as HashCuckooRep does not support snapshot
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST_P(DBIteratorTest, IteratorPinsRef) {
|
||||
|
@ -332,8 +332,7 @@ TEST_P(MergeOperatorPinningTest, Randomized) {
|
||||
|
||||
VerifyDBFromMap(true_data);
|
||||
|
||||
// Skip HashCuckoo since it does not support merge operators
|
||||
} while (ChangeOptions(kSkipMergePut | kSkipHashCuckoo));
|
||||
} while (ChangeOptions(kSkipMergePut));
|
||||
}
|
||||
|
||||
class MergeOperatorHook : public MergeOperator {
|
||||
|
@ -72,8 +72,8 @@ TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) {
|
||||
// Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
|
||||
// compactions as the above assertions about the number of files in a level
|
||||
// do not hold true.
|
||||
} while (ChangeOptions(kRangeDelSkipConfigs | kSkipHashCuckoo |
|
||||
kSkipUniversalCompaction | kSkipFIFOCompaction));
|
||||
} while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction |
|
||||
kSkipFIFOCompaction));
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
|
||||
@ -645,8 +645,7 @@ TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) {
|
||||
std::string value;
|
||||
ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
// Cuckoo memtables do not support snapshots.
|
||||
} while (ChangeOptions(kRangeDelSkipConfigs | kSkipHashCuckoo));
|
||||
} while (ChangeOptions(kRangeDelSkipConfigs));
|
||||
}
|
||||
|
||||
TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) {
|
||||
|
@ -487,11 +487,11 @@ TEST_F(DBTest, PutSingleDeleteGet) {
|
||||
ASSERT_EQ("v2", Get(1, "foo2"));
|
||||
ASSERT_OK(SingleDelete(1, "foo"));
|
||||
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because single delete does not get removed when it encounters a merge.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Skip FIFO and universal compaction beccause they do not apply to the test
|
||||
// case. Skip MergePut because single delete does not get removed when it
|
||||
// encounters a merge.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
TEST_F(DBTest, ReadFromPersistedTier) {
|
||||
@ -604,7 +604,7 @@ TEST_F(DBTest, ReadFromPersistedTier) {
|
||||
DestroyAndReopen(options);
|
||||
}
|
||||
}
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST_F(DBTest, SingleDeleteFlush) {
|
||||
@ -640,11 +640,11 @@ TEST_F(DBTest, SingleDeleteFlush) {
|
||||
|
||||
ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
|
||||
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because merges cannot be combined with single deletions.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Skip FIFO and universal compaction beccause they do not apply to the test
|
||||
// case. Skip MergePut because single delete does not get removed when it
|
||||
// encounters a merge.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
TEST_F(DBTest, SingleDeletePutFlush) {
|
||||
@ -663,11 +663,11 @@ TEST_F(DBTest, SingleDeletePutFlush) {
|
||||
ASSERT_OK(Flush(1));
|
||||
|
||||
ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because merges cannot be combined with single deletions.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Skip FIFO and universal compaction beccause they do not apply to the test
|
||||
// case. Skip MergePut because single delete does not get removed when it
|
||||
// encounters a merge.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
// Disable because not all platform can run it.
|
||||
@ -1569,7 +1569,7 @@ TEST_F(DBTest, Snapshot) {
|
||||
ASSERT_EQ(0U, GetNumSnapshots());
|
||||
ASSERT_EQ("0v4", Get(0, "foo"));
|
||||
ASSERT_EQ("1v4", Get(1, "foo"));
|
||||
} while (ChangeOptions(kSkipHashCuckoo));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST_F(DBTest, HiddenValuesAreRemoved) {
|
||||
@ -1606,9 +1606,8 @@ TEST_F(DBTest, HiddenValuesAreRemoved) {
|
||||
ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
|
||||
// ApproximateOffsetOf() is not yet implemented in plain table format,
|
||||
// which is used by Size().
|
||||
// skip HashCuckooRep as it does not support snapshot
|
||||
} while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
|
||||
kSkipPlainTable | kSkipHashCuckoo));
|
||||
kSkipPlainTable));
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
@ -1654,11 +1653,11 @@ TEST_F(DBTest, UnremovableSingleDelete) {
|
||||
ASSERT_EQ("first", Get(1, "foo", snapshot));
|
||||
ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
// Skip HashCuckooRep as it does not support single delete. FIFO and
|
||||
// universal compaction do not apply to the test case. Skip MergePut
|
||||
// because single delete does not get removed when it encounters a merge.
|
||||
} while (ChangeOptions(kSkipHashCuckoo | kSkipFIFOCompaction |
|
||||
kSkipUniversalCompaction | kSkipMergePut));
|
||||
// Skip FIFO and universal compaction beccause they do not apply to the test
|
||||
// case. Skip MergePut because single delete does not get removed when it
|
||||
// encounters a merge.
|
||||
} while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction |
|
||||
kSkipMergePut));
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
@ -2259,10 +2258,7 @@ class MultiThreadedDBTest : public DBTest,
|
||||
static std::vector<int> GenerateOptionConfigs() {
|
||||
std::vector<int> optionConfigs;
|
||||
for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
|
||||
// skip as HashCuckooRep does not support snapshot
|
||||
if (optionConfig != kHashCuckoo) {
|
||||
optionConfigs.push_back(optionConfig);
|
||||
}
|
||||
optionConfigs.push_back(optionConfig);
|
||||
}
|
||||
return optionConfigs;
|
||||
}
|
||||
@ -2825,9 +2821,8 @@ class DBTestRandomized : public DBTest,
|
||||
std::vector<int> option_configs;
|
||||
// skip cuckoo hash as it does not support snapshot.
|
||||
for (int option_config = kDefault; option_config < kEnd; ++option_config) {
|
||||
if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst |
|
||||
kSkipNoSeekToLast |
|
||||
kSkipHashCuckoo)) {
|
||||
if (!ShouldSkipOptions(option_config,
|
||||
kSkipDeletesFilterFirst | kSkipNoSeekToLast)) {
|
||||
option_configs.push_back(option_config);
|
||||
}
|
||||
}
|
||||
@ -2857,7 +2852,6 @@ TEST_P(DBTestRandomized, Randomized) {
|
||||
int p = rnd.Uniform(100);
|
||||
int minimum = 0;
|
||||
if (option_config_ == kHashSkipList || option_config_ == kHashLinkList ||
|
||||
option_config_ == kHashCuckoo ||
|
||||
option_config_ == kPlainTableFirstBytePrefix ||
|
||||
option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
|
||||
option_config_ == kBlockBasedTableWithPrefixHashIndex) {
|
||||
@ -3137,10 +3131,6 @@ TEST_F(DBTest, FIFOCompactionWithTTLAndVariousTableFormatsTest) {
|
||||
options.table_factory.reset(NewPlainTableFactory());
|
||||
ASSERT_TRUE(TryReopen(options).IsNotSupported());
|
||||
|
||||
Destroy(options);
|
||||
options.table_factory.reset(NewCuckooTableFactory());
|
||||
ASSERT_TRUE(TryReopen(options).IsNotSupported());
|
||||
|
||||
Destroy(options);
|
||||
options.table_factory.reset(NewAdaptiveTableFactory());
|
||||
ASSERT_TRUE(TryReopen(options).IsNotSupported());
|
||||
|
@ -101,18 +101,18 @@ DBTestBase::~DBTestBase() {
|
||||
bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) {
|
||||
#ifdef ROCKSDB_LITE
|
||||
// These options are not supported in ROCKSDB_LITE
|
||||
if (option_config == kHashSkipList ||
|
||||
option_config == kPlainTableFirstBytePrefix ||
|
||||
option_config == kPlainTableCappedPrefix ||
|
||||
option_config == kPlainTableCappedPrefixNonMmap ||
|
||||
option_config == kPlainTableAllBytesPrefix ||
|
||||
option_config == kVectorRep || option_config == kHashLinkList ||
|
||||
option_config == kHashCuckoo || option_config == kUniversalCompaction ||
|
||||
option_config == kUniversalCompactionMultiLevel ||
|
||||
option_config == kUniversalSubcompactions ||
|
||||
option_config == kFIFOCompaction ||
|
||||
option_config == kConcurrentSkipList) {
|
||||
return true;
|
||||
if (option_config == kHashSkipList ||
|
||||
option_config == kPlainTableFirstBytePrefix ||
|
||||
option_config == kPlainTableCappedPrefix ||
|
||||
option_config == kPlainTableCappedPrefixNonMmap ||
|
||||
option_config == kPlainTableAllBytesPrefix ||
|
||||
option_config == kVectorRep || option_config == kHashLinkList ||
|
||||
option_config == kUniversalCompaction ||
|
||||
option_config == kUniversalCompactionMultiLevel ||
|
||||
option_config == kUniversalSubcompactions ||
|
||||
option_config == kFIFOCompaction ||
|
||||
option_config == kConcurrentSkipList) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -141,9 +141,6 @@ bool DBTestBase::ShouldSkipOptions(int option_config, int skip_mask) {
|
||||
option_config == kBlockBasedTableWithWholeKeyHashIndex)) {
|
||||
return true;
|
||||
}
|
||||
if ((skip_mask & kSkipHashCuckoo) && (option_config == kHashCuckoo)) {
|
||||
return true;
|
||||
}
|
||||
if ((skip_mask & kSkipFIFOCompaction) && option_config == kFIFOCompaction) {
|
||||
return true;
|
||||
}
|
||||
@ -383,11 +380,6 @@ Options DBTestBase::GetOptions(
|
||||
NewHashLinkListRepFactory(4, 0, 3, true, 4));
|
||||
options.allow_concurrent_memtable_write = false;
|
||||
break;
|
||||
case kHashCuckoo:
|
||||
options.memtable_factory.reset(
|
||||
NewHashCuckooRepFactory(options.write_buffer_size));
|
||||
options.allow_concurrent_memtable_write = false;
|
||||
break;
|
||||
case kDirectIO: {
|
||||
options.use_direct_reads = true;
|
||||
options.use_direct_io_for_flush_and_compaction = true;
|
||||
|
@ -652,29 +652,28 @@ class DBTestBase : public testing::Test {
|
||||
kPlainTableAllBytesPrefix = 6,
|
||||
kVectorRep = 7,
|
||||
kHashLinkList = 8,
|
||||
kHashCuckoo = 9,
|
||||
kMergePut = 10,
|
||||
kFilter = 11,
|
||||
kFullFilterWithNewTableReaderForCompactions = 12,
|
||||
kUncompressed = 13,
|
||||
kNumLevel_3 = 14,
|
||||
kDBLogDir = 15,
|
||||
kWalDirAndMmapReads = 16,
|
||||
kManifestFileSize = 17,
|
||||
kPerfOptions = 18,
|
||||
kHashSkipList = 19,
|
||||
kUniversalCompaction = 20,
|
||||
kUniversalCompactionMultiLevel = 21,
|
||||
kCompressedBlockCache = 22,
|
||||
kInfiniteMaxOpenFiles = 23,
|
||||
kxxHashChecksum = 24,
|
||||
kFIFOCompaction = 25,
|
||||
kOptimizeFiltersForHits = 26,
|
||||
kRowCache = 27,
|
||||
kRecycleLogFiles = 28,
|
||||
kConcurrentSkipList = 29,
|
||||
kPipelinedWrite = 30,
|
||||
kConcurrentWALWrites = 31,
|
||||
kMergePut = 9,
|
||||
kFilter = 10,
|
||||
kFullFilterWithNewTableReaderForCompactions = 11,
|
||||
kUncompressed = 12,
|
||||
kNumLevel_3 = 13,
|
||||
kDBLogDir = 14,
|
||||
kWalDirAndMmapReads = 15,
|
||||
kManifestFileSize = 16,
|
||||
kPerfOptions = 17,
|
||||
kHashSkipList = 18,
|
||||
kUniversalCompaction = 19,
|
||||
kUniversalCompactionMultiLevel = 20,
|
||||
kCompressedBlockCache = 21,
|
||||
kInfiniteMaxOpenFiles = 22,
|
||||
kxxHashChecksum = 23,
|
||||
kFIFOCompaction = 24,
|
||||
kOptimizeFiltersForHits = 25,
|
||||
kRowCache = 26,
|
||||
kRecycleLogFiles = 27,
|
||||
kConcurrentSkipList = 28,
|
||||
kPipelinedWrite = 29,
|
||||
kConcurrentWALWrites = 30,
|
||||
kDirectIO,
|
||||
kLevelSubcompactions,
|
||||
kBlockBasedTableWithIndexRestartInterval,
|
||||
@ -710,7 +709,6 @@ class DBTestBase : public testing::Test {
|
||||
kSkipPlainTable = 8,
|
||||
kSkipHashIndex = 16,
|
||||
kSkipNoSeekToLast = 32,
|
||||
kSkipHashCuckoo = 64,
|
||||
kSkipFIFOCompaction = 128,
|
||||
kSkipMmapReads = 256,
|
||||
};
|
||||
|
@ -362,39 +362,5 @@ extern MemTableRepFactory* NewHashLinkListRepFactory(
|
||||
bool if_log_bucket_dist_when_flash = true,
|
||||
uint32_t threshold_use_skiplist = 256);
|
||||
|
||||
// This factory creates a cuckoo-hashing based mem-table representation.
|
||||
// Cuckoo-hash is a closed-hash strategy, in which all key/value pairs
|
||||
// are stored in the bucket array itself instead of in some data structures
|
||||
// external to the bucket array. In addition, each key in cuckoo hash
|
||||
// has a constant number of possible buckets in the bucket array. These
|
||||
// two properties together makes cuckoo hash more memory efficient and
|
||||
// a constant worst-case read time. Cuckoo hash is best suitable for
|
||||
// point-lookup workload.
|
||||
//
|
||||
// When inserting a key / value, it first checks whether one of its possible
|
||||
// buckets is empty. If so, the key / value will be inserted to that vacant
|
||||
// bucket. Otherwise, one of the keys originally stored in one of these
|
||||
// possible buckets will be "kicked out" and move to one of its possible
|
||||
// buckets (and possibly kicks out another victim.) In the current
|
||||
// implementation, such "kick-out" path is bounded. If it cannot find a
|
||||
// "kick-out" path for a specific key, this key will be stored in a backup
|
||||
// structure, and the current memtable to be forced to immutable.
|
||||
//
|
||||
// Note that currently this mem-table representation does not support
|
||||
// snapshot (i.e., it only queries latest state) and iterators. In addition,
|
||||
// MultiGet operation might also lose its atomicity due to the lack of
|
||||
// snapshot support.
|
||||
//
|
||||
// Parameters:
|
||||
// write_buffer_size: the write buffer size in bytes.
|
||||
// average_data_size: the average size of key + value in bytes. This value
|
||||
// together with write_buffer_size will be used to compute the number
|
||||
// of buckets.
|
||||
// hash_function_count: the number of hash functions that will be used by
|
||||
// the cuckoo-hash. The number also equals to the number of possible
|
||||
// buckets each key will have.
|
||||
extern MemTableRepFactory* NewHashCuckooRepFactory(
|
||||
size_t write_buffer_size, size_t average_data_size = 64,
|
||||
unsigned int hash_function_count = 4);
|
||||
#endif // ROCKSDB_LITE
|
||||
} // namespace rocksdb
|
||||
|
@ -1,661 +0,0 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
//
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
#include "memtable/hash_cuckoo_rep.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "db/memtable.h"
|
||||
#include "memtable/skiplist.h"
|
||||
#include "memtable/stl_wrappers.h"
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/memtablerep.h"
|
||||
#include "util/murmurhash.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace {
|
||||
|
||||
// the default maximum size of the cuckoo path searching queue
|
||||
static const int kCuckooPathMaxSearchSteps = 100;
|
||||
|
||||
struct CuckooStep {
|
||||
static const int kNullStep = -1;
|
||||
// the bucket id in the cuckoo array.
|
||||
int bucket_id_;
|
||||
// index of cuckoo-step array that points to its previous step,
|
||||
// -1 if it the beginning step.
|
||||
int prev_step_id_;
|
||||
// the depth of the current step.
|
||||
unsigned int depth_;
|
||||
|
||||
CuckooStep() : bucket_id_(-1), prev_step_id_(kNullStep), depth_(1) {}
|
||||
|
||||
CuckooStep(CuckooStep&& o) = default;
|
||||
|
||||
CuckooStep& operator=(CuckooStep&& rhs) {
|
||||
bucket_id_ = std::move(rhs.bucket_id_);
|
||||
prev_step_id_ = std::move(rhs.prev_step_id_);
|
||||
depth_ = std::move(rhs.depth_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
CuckooStep(const CuckooStep&) = delete;
|
||||
CuckooStep& operator=(const CuckooStep&) = delete;
|
||||
|
||||
CuckooStep(int bucket_id, int prev_step_id, int depth)
|
||||
: bucket_id_(bucket_id), prev_step_id_(prev_step_id), depth_(depth) {}
|
||||
};
|
||||
|
||||
class HashCuckooRep : public MemTableRep {
|
||||
public:
|
||||
explicit HashCuckooRep(const MemTableRep::KeyComparator& compare,
|
||||
Allocator* allocator, const size_t bucket_count,
|
||||
const unsigned int hash_func_count,
|
||||
const size_t approximate_entry_size)
|
||||
: MemTableRep(allocator),
|
||||
compare_(compare),
|
||||
allocator_(allocator),
|
||||
bucket_count_(bucket_count),
|
||||
approximate_entry_size_(approximate_entry_size),
|
||||
cuckoo_path_max_depth_(kDefaultCuckooPathMaxDepth),
|
||||
occupied_count_(0),
|
||||
hash_function_count_(hash_func_count),
|
||||
backup_table_(nullptr) {
|
||||
char* mem = reinterpret_cast<char*>(
|
||||
allocator_->Allocate(sizeof(std::atomic<const char*>) * bucket_count_));
|
||||
cuckoo_array_ = new (mem) std::atomic<char*>[bucket_count_];
|
||||
for (unsigned int bid = 0; bid < bucket_count_; ++bid) {
|
||||
cuckoo_array_[bid].store(nullptr, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
cuckoo_path_ = reinterpret_cast<int*>(
|
||||
allocator_->Allocate(sizeof(int) * (cuckoo_path_max_depth_ + 1)));
|
||||
is_nearly_full_ = false;
|
||||
}
|
||||
|
||||
// return false, indicating HashCuckooRep does not support merge operator.
|
||||
virtual bool IsMergeOperatorSupported() const override { return false; }
|
||||
|
||||
// return false, indicating HashCuckooRep does not support snapshot.
|
||||
virtual bool IsSnapshotSupported() const override { return false; }
|
||||
|
||||
// Returns true iff an entry that compares equal to key is in the collection.
|
||||
virtual bool Contains(const char* internal_key) const override;
|
||||
|
||||
virtual ~HashCuckooRep() override {}
|
||||
|
||||
// Insert the specified key (internal_key) into the mem-table. Assertion
|
||||
// fails if
|
||||
// the current mem-table already contains the specified key.
|
||||
virtual void Insert(KeyHandle handle) override;
|
||||
|
||||
// This function returns bucket_count_ * approximate_entry_size_ when any
|
||||
// of the followings happen to disallow further write operations:
|
||||
// 1. when the fullness reaches kMaxFullnes.
|
||||
// 2. when the backup_table_ is used.
|
||||
//
|
||||
// otherwise, this function will always return 0.
|
||||
virtual size_t ApproximateMemoryUsage() override {
|
||||
if (is_nearly_full_) {
|
||||
return bucket_count_ * approximate_entry_size_;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual void Get(const LookupKey& k, void* callback_args,
|
||||
bool (*callback_func)(void* arg,
|
||||
const char* entry)) override;
|
||||
|
||||
class Iterator : public MemTableRep::Iterator {
|
||||
std::shared_ptr<std::vector<const char*>> bucket_;
|
||||
std::vector<const char*>::const_iterator mutable cit_;
|
||||
const KeyComparator& compare_;
|
||||
std::string tmp_; // For passing to EncodeKey
|
||||
bool mutable sorted_;
|
||||
void DoSort() const;
|
||||
|
||||
public:
|
||||
explicit Iterator(std::shared_ptr<std::vector<const char*>> bucket,
|
||||
const KeyComparator& compare);
|
||||
|
||||
// Initialize an iterator over the specified collection.
|
||||
// The returned iterator is not valid.
|
||||
// explicit Iterator(const MemTableRep* collection);
|
||||
virtual ~Iterator() override{};
|
||||
|
||||
// Returns true iff the iterator is positioned at a valid node.
|
||||
virtual bool Valid() const override;
|
||||
|
||||
// Returns the key at the current position.
|
||||
// REQUIRES: Valid()
|
||||
virtual const char* key() const override;
|
||||
|
||||
// Advances to the next position.
|
||||
// REQUIRES: Valid()
|
||||
virtual void Next() override;
|
||||
|
||||
// Advances to the previous position.
|
||||
// REQUIRES: Valid()
|
||||
virtual void Prev() override;
|
||||
|
||||
// Advance to the first entry with a key >= target
|
||||
virtual void Seek(const Slice& user_key, const char* memtable_key) override;
|
||||
|
||||
// Retreat to the last entry with a key <= target
|
||||
virtual void SeekForPrev(const Slice& user_key,
|
||||
const char* memtable_key) override;
|
||||
|
||||
// Position at the first entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
virtual void SeekToFirst() override;
|
||||
|
||||
// Position at the last entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
virtual void SeekToLast() override;
|
||||
};
|
||||
|
||||
struct CuckooStepBuffer {
|
||||
CuckooStepBuffer() : write_index_(0), read_index_(0) {}
|
||||
~CuckooStepBuffer() {}
|
||||
|
||||
int write_index_;
|
||||
int read_index_;
|
||||
CuckooStep steps_[kCuckooPathMaxSearchSteps];
|
||||
|
||||
CuckooStep& NextWriteBuffer() { return steps_[write_index_++]; }
|
||||
|
||||
inline const CuckooStep& ReadNext() { return steps_[read_index_++]; }
|
||||
|
||||
inline bool HasNewWrite() { return write_index_ > read_index_; }
|
||||
|
||||
inline void reset() {
|
||||
write_index_ = 0;
|
||||
read_index_ = 0;
|
||||
}
|
||||
|
||||
inline bool IsFull() { return write_index_ >= kCuckooPathMaxSearchSteps; }
|
||||
|
||||
// returns the number of steps that has been read
|
||||
inline int ReadCount() { return read_index_; }
|
||||
|
||||
// returns the number of steps that has been written to the buffer.
|
||||
inline int WriteCount() { return write_index_; }
|
||||
};
|
||||
|
||||
private:
|
||||
const MemTableRep::KeyComparator& compare_;
|
||||
// the pointer to Allocator to allocate memory, immutable after construction.
|
||||
Allocator* const allocator_;
|
||||
// the number of hash bucket in the hash table.
|
||||
const size_t bucket_count_;
|
||||
// approximate size of each entry
|
||||
const size_t approximate_entry_size_;
|
||||
// the maxinum depth of the cuckoo path.
|
||||
const unsigned int cuckoo_path_max_depth_;
|
||||
// the current number of entries in cuckoo_array_ which has been occupied.
|
||||
size_t occupied_count_;
|
||||
// the current number of hash functions used in the cuckoo hash.
|
||||
unsigned int hash_function_count_;
|
||||
// the backup MemTableRep to handle the case where cuckoo hash cannot find
|
||||
// a vacant bucket for inserting the key of a put request.
|
||||
std::shared_ptr<MemTableRep> backup_table_;
|
||||
// the array to store pointers, pointing to the actual data.
|
||||
std::atomic<char*>* cuckoo_array_;
|
||||
// a buffer to store cuckoo path
|
||||
int* cuckoo_path_;
|
||||
// a boolean flag indicating whether the fullness of bucket array
|
||||
// reaches the point to make the current memtable immutable.
|
||||
bool is_nearly_full_;
|
||||
|
||||
// the default maximum depth of the cuckoo path.
|
||||
static const unsigned int kDefaultCuckooPathMaxDepth = 10;
|
||||
|
||||
CuckooStepBuffer step_buffer_;
|
||||
|
||||
// returns the bucket id assogied to the input slice based on the
|
||||
unsigned int GetHash(const Slice& slice, const int hash_func_id) const {
|
||||
// the seeds used in the Murmur hash to produce different hash functions.
|
||||
static const int kMurmurHashSeeds[HashCuckooRepFactory::kMaxHashCount] = {
|
||||
545609244, 1769731426, 763324157, 13099088, 592422103,
|
||||
1899789565, 248369300, 1984183468, 1613664382, 1491157517};
|
||||
return static_cast<unsigned int>(
|
||||
MurmurHash(slice.data(), static_cast<int>(slice.size()),
|
||||
kMurmurHashSeeds[hash_func_id]) %
|
||||
bucket_count_);
|
||||
}
|
||||
|
||||
// A cuckoo path is a sequence of bucket ids, where each id points to a
|
||||
// location of cuckoo_array_. This path describes the displacement sequence
|
||||
// of entries in order to store the desired data specified by the input user
|
||||
// key. The path starts from one of the locations associated with the
|
||||
// specified user key and ends at a vacant space in the cuckoo array. This
|
||||
// function will update the cuckoo_path.
|
||||
//
|
||||
// @return true if it found a cuckoo path.
|
||||
bool FindCuckooPath(const char* internal_key, const Slice& user_key,
|
||||
int* cuckoo_path, size_t* cuckoo_path_length,
|
||||
int initial_hash_id = 0);
|
||||
|
||||
// Perform quick insert by checking whether there is a vacant bucket in one
|
||||
// of the possible locations of the input key. If so, then the function will
|
||||
// return true and the key will be stored in that vacant bucket.
|
||||
//
|
||||
// This function is a helper function of FindCuckooPath that discovers the
|
||||
// first possible steps of a cuckoo path. It begins by first computing
|
||||
// the possible locations of the input keys (and stores them in bucket_ids.)
|
||||
// Then, if one of its possible locations is vacant, then the input key will
|
||||
// be stored in that vacant space and the function will return true.
|
||||
// Otherwise, the function will return false indicating a complete search
|
||||
// of cuckoo-path is needed.
|
||||
bool QuickInsert(const char* internal_key, const Slice& user_key,
|
||||
int bucket_ids[], const int initial_hash_id);
|
||||
|
||||
// Returns the pointer to the internal iterator to the buckets where buckets
|
||||
// are sorted according to the user specified KeyComparator. Note that
|
||||
// any insert after this function call may affect the sorted nature of
|
||||
// the returned iterator.
|
||||
virtual MemTableRep::Iterator* GetIterator(Arena* arena) override {
|
||||
std::vector<const char*> compact_buckets;
|
||||
for (unsigned int bid = 0; bid < bucket_count_; ++bid) {
|
||||
const char* bucket = cuckoo_array_[bid].load(std::memory_order_relaxed);
|
||||
if (bucket != nullptr) {
|
||||
compact_buckets.push_back(bucket);
|
||||
}
|
||||
}
|
||||
MemTableRep* backup_table = backup_table_.get();
|
||||
if (backup_table != nullptr) {
|
||||
std::unique_ptr<MemTableRep::Iterator> iter(backup_table->GetIterator());
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
compact_buckets.push_back(iter->key());
|
||||
}
|
||||
}
|
||||
if (arena == nullptr) {
|
||||
return new Iterator(
|
||||
std::shared_ptr<std::vector<const char*>>(
|
||||
new std::vector<const char*>(std::move(compact_buckets))),
|
||||
compare_);
|
||||
} else {
|
||||
auto mem = arena->AllocateAligned(sizeof(Iterator));
|
||||
return new (mem) Iterator(
|
||||
std::shared_ptr<std::vector<const char*>>(
|
||||
new std::vector<const char*>(std::move(compact_buckets))),
|
||||
compare_);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void HashCuckooRep::Get(const LookupKey& key, void* callback_args,
|
||||
bool (*callback_func)(void* arg, const char* entry)) {
|
||||
Slice user_key = key.user_key();
|
||||
for (unsigned int hid = 0; hid < hash_function_count_; ++hid) {
|
||||
const char* bucket =
|
||||
cuckoo_array_[GetHash(user_key, hid)].load(std::memory_order_acquire);
|
||||
if (bucket != nullptr) {
|
||||
Slice bucket_user_key = UserKey(bucket);
|
||||
if (user_key == bucket_user_key) {
|
||||
callback_func(callback_args, bucket);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// as Put() always stores at the vacant bucket located by the
|
||||
// hash function with the smallest possible id, when we first
|
||||
// find a vacant bucket in Get(), that means a miss.
|
||||
break;
|
||||
}
|
||||
}
|
||||
MemTableRep* backup_table = backup_table_.get();
|
||||
if (backup_table != nullptr) {
|
||||
backup_table->Get(key, callback_args, callback_func);
|
||||
}
|
||||
}
|
||||
|
||||
void HashCuckooRep::Insert(KeyHandle handle) {
|
||||
static const float kMaxFullness = 0.90f;
|
||||
|
||||
auto* key = static_cast<char*>(handle);
|
||||
int initial_hash_id = 0;
|
||||
size_t cuckoo_path_length = 0;
|
||||
auto user_key = UserKey(key);
|
||||
// find cuckoo path
|
||||
if (FindCuckooPath(key, user_key, cuckoo_path_, &cuckoo_path_length,
|
||||
initial_hash_id) == false) {
|
||||
// if true, then we can't find a vacant bucket for this key even we
|
||||
// have used up all the hash functions. Then use a backup memtable to
|
||||
// store such key, which will further make this mem-table become
|
||||
// immutable.
|
||||
if (backup_table_.get() == nullptr) {
|
||||
VectorRepFactory factory(10);
|
||||
backup_table_.reset(
|
||||
factory.CreateMemTableRep(compare_, allocator_, nullptr, nullptr));
|
||||
is_nearly_full_ = true;
|
||||
}
|
||||
backup_table_->Insert(key);
|
||||
return;
|
||||
}
|
||||
// when reaching this point, means the insert can be done successfully.
|
||||
occupied_count_++;
|
||||
if (occupied_count_ >= bucket_count_ * kMaxFullness) {
|
||||
is_nearly_full_ = true;
|
||||
}
|
||||
|
||||
// perform kickout process if the length of cuckoo path > 1.
|
||||
if (cuckoo_path_length == 0) return;
|
||||
|
||||
// the cuckoo path stores the kickout path in reverse order.
|
||||
// so the kickout or displacement is actually performed
|
||||
// in reverse order, which avoids false-negatives on read
|
||||
// by moving each key involved in the cuckoo path to the new
|
||||
// location before replacing it.
|
||||
for (size_t i = 1; i < cuckoo_path_length; ++i) {
|
||||
int kicked_out_bid = cuckoo_path_[i - 1];
|
||||
int current_bid = cuckoo_path_[i];
|
||||
// since we only allow one writer at a time, it is safe to do relaxed read.
|
||||
cuckoo_array_[kicked_out_bid]
|
||||
.store(cuckoo_array_[current_bid].load(std::memory_order_relaxed),
|
||||
std::memory_order_release);
|
||||
}
|
||||
int insert_key_bid = cuckoo_path_[cuckoo_path_length - 1];
|
||||
cuckoo_array_[insert_key_bid].store(key, std::memory_order_release);
|
||||
}
|
||||
|
||||
bool HashCuckooRep::Contains(const char* internal_key) const {
|
||||
auto user_key = UserKey(internal_key);
|
||||
for (unsigned int hid = 0; hid < hash_function_count_; ++hid) {
|
||||
const char* stored_key =
|
||||
cuckoo_array_[GetHash(user_key, hid)].load(std::memory_order_acquire);
|
||||
if (stored_key != nullptr) {
|
||||
if (compare_(internal_key, stored_key) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HashCuckooRep::QuickInsert(const char* internal_key, const Slice& user_key,
|
||||
int bucket_ids[], const int initial_hash_id) {
|
||||
int cuckoo_bucket_id = -1;
|
||||
|
||||
// Below does the followings:
|
||||
// 0. Calculate all possible locations of the input key.
|
||||
// 1. Check if there is a bucket having same user_key as the input does.
|
||||
// 2. If there exists such bucket, then replace this bucket by the newly
|
||||
// insert data and return. This step also performs duplication check.
|
||||
// 3. If no such bucket exists but exists a vacant bucket, then insert the
|
||||
// input data into it.
|
||||
// 4. If step 1 to 3 all fail, then return false.
|
||||
for (unsigned int hid = initial_hash_id; hid < hash_function_count_; ++hid) {
|
||||
bucket_ids[hid] = GetHash(user_key, hid);
|
||||
// since only one PUT is allowed at a time, and this is part of the PUT
|
||||
// operation, so we can safely perform relaxed load.
|
||||
const char* stored_key =
|
||||
cuckoo_array_[bucket_ids[hid]].load(std::memory_order_relaxed);
|
||||
if (stored_key == nullptr) {
|
||||
if (cuckoo_bucket_id == -1) {
|
||||
cuckoo_bucket_id = bucket_ids[hid];
|
||||
}
|
||||
} else {
|
||||
const auto bucket_user_key = UserKey(stored_key);
|
||||
if (bucket_user_key.compare(user_key) == 0) {
|
||||
cuckoo_bucket_id = bucket_ids[hid];
|
||||
assert(cuckoo_bucket_id != -1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cuckoo_bucket_id != -1) {
|
||||
cuckoo_array_[cuckoo_bucket_id].store(const_cast<char*>(internal_key),
|
||||
std::memory_order_release);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Perform pre-check and find the shortest cuckoo path. A cuckoo path
|
||||
// is a displacement sequence for inserting the specified input key.
|
||||
//
|
||||
// @return true if it successfully found a vacant space or cuckoo-path.
|
||||
// If the return value is true but the length of cuckoo_path is zero,
|
||||
// then it indicates that a vacant bucket or an bucket with matched user
|
||||
// key with the input is found, and a quick insertion is done.
|
||||
bool HashCuckooRep::FindCuckooPath(const char* internal_key,
|
||||
const Slice& user_key, int* cuckoo_path,
|
||||
size_t* cuckoo_path_length,
|
||||
const int initial_hash_id) {
|
||||
int bucket_ids[HashCuckooRepFactory::kMaxHashCount];
|
||||
*cuckoo_path_length = 0;
|
||||
|
||||
if (QuickInsert(internal_key, user_key, bucket_ids, initial_hash_id)) {
|
||||
return true;
|
||||
}
|
||||
// If this step is reached, then it means:
|
||||
// 1. no vacant bucket in any of the possible locations of the input key.
|
||||
// 2. none of the possible locations of the input key has the same user
|
||||
// key as the input `internal_key`.
|
||||
|
||||
// the front and back indices for the step_queue_
|
||||
step_buffer_.reset();
|
||||
|
||||
for (unsigned int hid = initial_hash_id; hid < hash_function_count_; ++hid) {
|
||||
/// CuckooStep& current_step = step_queue_[front_pos++];
|
||||
CuckooStep& current_step = step_buffer_.NextWriteBuffer();
|
||||
current_step.bucket_id_ = bucket_ids[hid];
|
||||
current_step.prev_step_id_ = CuckooStep::kNullStep;
|
||||
current_step.depth_ = 1;
|
||||
}
|
||||
|
||||
while (step_buffer_.HasNewWrite()) {
|
||||
int step_id = step_buffer_.read_index_;
|
||||
const CuckooStep& step = step_buffer_.ReadNext();
|
||||
// Since it's a BFS process, then the first step with its depth deeper
|
||||
// than the maximum allowed depth indicates all the remaining steps
|
||||
// in the step buffer queue will all exceed the maximum depth.
|
||||
// Return false immediately indicating we can't find a vacant bucket
|
||||
// for the input key before the maximum allowed depth.
|
||||
if (step.depth_ >= cuckoo_path_max_depth_) {
|
||||
return false;
|
||||
}
|
||||
// again, we can perform no barrier load safely here as the current
|
||||
// thread is the only writer.
|
||||
Slice bucket_user_key =
|
||||
UserKey(cuckoo_array_[step.bucket_id_].load(std::memory_order_relaxed));
|
||||
if (step.prev_step_id_ != CuckooStep::kNullStep) {
|
||||
if (bucket_user_key == user_key) {
|
||||
// then there is a loop in the current path, stop discovering this path.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// if the current bucket stores at its nth location, then we only consider
|
||||
// its mth location where m > n. This property makes sure that all reads
|
||||
// will not miss if we do have data associated to the query key.
|
||||
//
|
||||
// The n and m in the above statement is the start_hid and hid in the code.
|
||||
unsigned int start_hid = hash_function_count_;
|
||||
for (unsigned int hid = 0; hid < hash_function_count_; ++hid) {
|
||||
bucket_ids[hid] = GetHash(bucket_user_key, hid);
|
||||
if (step.bucket_id_ == bucket_ids[hid]) {
|
||||
start_hid = hid;
|
||||
}
|
||||
}
|
||||
// must found a bucket which is its current "home".
|
||||
assert(start_hid != hash_function_count_);
|
||||
|
||||
// explore all possible next steps from the current step.
|
||||
for (unsigned int hid = start_hid + 1; hid < hash_function_count_; ++hid) {
|
||||
CuckooStep& next_step = step_buffer_.NextWriteBuffer();
|
||||
next_step.bucket_id_ = bucket_ids[hid];
|
||||
next_step.prev_step_id_ = step_id;
|
||||
next_step.depth_ = step.depth_ + 1;
|
||||
// once a vacant bucket is found, trace back all its previous steps
|
||||
// to generate a cuckoo path.
|
||||
if (cuckoo_array_[next_step.bucket_id_].load(std::memory_order_relaxed) ==
|
||||
nullptr) {
|
||||
// store the last step in the cuckoo path. Note that cuckoo_path
|
||||
// stores steps in reverse order. This allows us to move keys along
|
||||
// the cuckoo path by storing each key to the new place first before
|
||||
// removing it from the old place. This property ensures reads will
|
||||
// not missed due to moving keys along the cuckoo path.
|
||||
cuckoo_path[(*cuckoo_path_length)++] = next_step.bucket_id_;
|
||||
int depth;
|
||||
for (depth = step.depth_; depth > 0 && step_id != CuckooStep::kNullStep;
|
||||
depth--) {
|
||||
const CuckooStep& prev_step = step_buffer_.steps_[step_id];
|
||||
cuckoo_path[(*cuckoo_path_length)++] = prev_step.bucket_id_;
|
||||
step_id = prev_step.prev_step_id_;
|
||||
}
|
||||
assert(depth == 0 && step_id == CuckooStep::kNullStep);
|
||||
return true;
|
||||
}
|
||||
if (step_buffer_.IsFull()) {
|
||||
// if true, then it reaches maxinum number of cuckoo search steps.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tried all possible paths but still not unable to find a cuckoo path
|
||||
// which path leads to a vacant bucket.
|
||||
return false;
|
||||
}
|
||||
|
||||
HashCuckooRep::Iterator::Iterator(
|
||||
std::shared_ptr<std::vector<const char*>> bucket,
|
||||
const KeyComparator& compare)
|
||||
: bucket_(bucket),
|
||||
cit_(bucket_->end()),
|
||||
compare_(compare),
|
||||
sorted_(false) {}
|
||||
|
||||
void HashCuckooRep::Iterator::DoSort() const {
|
||||
if (!sorted_) {
|
||||
std::sort(bucket_->begin(), bucket_->end(),
|
||||
stl_wrappers::Compare(compare_));
|
||||
cit_ = bucket_->begin();
|
||||
sorted_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true iff the iterator is positioned at a valid node.
|
||||
bool HashCuckooRep::Iterator::Valid() const {
|
||||
DoSort();
|
||||
return cit_ != bucket_->end();
|
||||
}
|
||||
|
||||
// Returns the key at the current position.
|
||||
// REQUIRES: Valid()
|
||||
const char* HashCuckooRep::Iterator::key() const {
|
||||
assert(Valid());
|
||||
return *cit_;
|
||||
}
|
||||
|
||||
// Advances to the next position.
|
||||
// REQUIRES: Valid()
|
||||
void HashCuckooRep::Iterator::Next() {
|
||||
assert(Valid());
|
||||
if (cit_ == bucket_->end()) {
|
||||
return;
|
||||
}
|
||||
++cit_;
|
||||
}
|
||||
|
||||
// Advances to the previous position.
|
||||
// REQUIRES: Valid()
|
||||
void HashCuckooRep::Iterator::Prev() {
|
||||
assert(Valid());
|
||||
if (cit_ == bucket_->begin()) {
|
||||
// If you try to go back from the first element, the iterator should be
|
||||
// invalidated. So we set it to past-the-end. This means that you can
|
||||
// treat the container circularly.
|
||||
cit_ = bucket_->end();
|
||||
} else {
|
||||
--cit_;
|
||||
}
|
||||
}
|
||||
|
||||
// Advance to the first entry with a key >= target
|
||||
void HashCuckooRep::Iterator::Seek(const Slice& user_key,
|
||||
const char* memtable_key) {
|
||||
DoSort();
|
||||
// Do binary search to find first value not less than the target
|
||||
const char* encoded_key =
|
||||
(memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key);
|
||||
cit_ = std::equal_range(bucket_->begin(), bucket_->end(), encoded_key,
|
||||
[this](const char* a, const char* b) {
|
||||
return compare_(a, b) < 0;
|
||||
}).first;
|
||||
}
|
||||
|
||||
// Retreat to the last entry with a key <= target
|
||||
void HashCuckooRep::Iterator::SeekForPrev(const Slice& /*user_key*/,
|
||||
const char* /*memtable_key*/) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
// Position at the first entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
void HashCuckooRep::Iterator::SeekToFirst() {
|
||||
DoSort();
|
||||
cit_ = bucket_->begin();
|
||||
}
|
||||
|
||||
// Position at the last entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
void HashCuckooRep::Iterator::SeekToLast() {
|
||||
DoSort();
|
||||
cit_ = bucket_->end();
|
||||
if (bucket_->size() != 0) {
|
||||
--cit_;
|
||||
}
|
||||
}
|
||||
|
||||
} // anom namespace
|
||||
|
||||
MemTableRep* HashCuckooRepFactory::CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Allocator* allocator,
|
||||
const SliceTransform* /*transform*/, Logger* /*logger*/) {
|
||||
// The estimated average fullness. The write performance of any close hash
|
||||
// degrades as the fullness of the mem-table increases. Setting kFullness
|
||||
// to a value around 0.7 can better avoid write performance degradation while
|
||||
// keeping efficient memory usage.
|
||||
static const float kFullness = 0.7f;
|
||||
size_t pointer_size = sizeof(std::atomic<const char*>);
|
||||
assert(write_buffer_size_ >= (average_data_size_ + pointer_size));
|
||||
size_t bucket_count =
|
||||
static_cast<size_t>(
|
||||
(write_buffer_size_ / (average_data_size_ + pointer_size)) / kFullness +
|
||||
1);
|
||||
unsigned int hash_function_count = hash_function_count_;
|
||||
if (hash_function_count < 2) {
|
||||
hash_function_count = 2;
|
||||
}
|
||||
if (hash_function_count > kMaxHashCount) {
|
||||
hash_function_count = kMaxHashCount;
|
||||
}
|
||||
return new HashCuckooRep(compare, allocator, bucket_count,
|
||||
hash_function_count,
|
||||
static_cast<size_t>(
|
||||
(average_data_size_ + pointer_size) / kFullness)
|
||||
);
|
||||
}
|
||||
|
||||
MemTableRepFactory* NewHashCuckooRepFactory(size_t write_buffer_size,
|
||||
size_t average_data_size,
|
||||
unsigned int hash_function_count) {
|
||||
return new HashCuckooRepFactory(write_buffer_size, average_data_size,
|
||||
hash_function_count);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
#endif // ROCKSDB_LITE
|
@ -1,44 +0,0 @@
|
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once
|
||||
#ifndef ROCKSDB_LITE
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
#include "rocksdb/memtablerep.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class HashCuckooRepFactory : public MemTableRepFactory {
|
||||
public:
|
||||
// maxinum number of hash functions used in the cuckoo hash.
|
||||
static const unsigned int kMaxHashCount = 10;
|
||||
|
||||
explicit HashCuckooRepFactory(size_t write_buffer_size,
|
||||
size_t average_data_size,
|
||||
unsigned int hash_function_count)
|
||||
: write_buffer_size_(write_buffer_size),
|
||||
average_data_size_(average_data_size),
|
||||
hash_function_count_(hash_function_count) {}
|
||||
|
||||
virtual ~HashCuckooRepFactory() {}
|
||||
|
||||
using MemTableRepFactory::CreateMemTableRep;
|
||||
virtual MemTableRep* CreateMemTableRep(
|
||||
const MemTableRep::KeyComparator& compare, Allocator* allocator,
|
||||
const SliceTransform* transform, Logger* logger) override;
|
||||
|
||||
virtual const char* Name() const override { return "HashCuckooRepFactory"; }
|
||||
|
||||
private:
|
||||
size_t write_buffer_size_;
|
||||
size_t average_data_size_;
|
||||
const unsigned int hash_function_count_;
|
||||
};
|
||||
} // namespace rocksdb
|
||||
#endif // ROCKSDB_LITE
|
@ -95,17 +95,8 @@ DEFINE_int32(
|
||||
threshold_use_skiplist, 256,
|
||||
"threshold_use_skiplist parameter to pass into NewHashLinkListRepFactory");
|
||||
|
||||
DEFINE_int64(
|
||||
write_buffer_size, 256,
|
||||
"write_buffer_size parameter to pass into NewHashCuckooRepFactory");
|
||||
|
||||
DEFINE_int64(
|
||||
average_data_size, 64,
|
||||
"average_data_size parameter to pass into NewHashCuckooRepFactory");
|
||||
|
||||
DEFINE_int64(
|
||||
hash_function_count, 4,
|
||||
"hash_function_count parameter to pass into NewHashCuckooRepFactory");
|
||||
DEFINE_int64(write_buffer_size, 256,
|
||||
"write_buffer_size parameter to pass into WriteBufferManager");
|
||||
|
||||
DEFINE_int32(
|
||||
num_threads, 1,
|
||||
@ -607,12 +598,6 @@ int main(int argc, char** argv) {
|
||||
FLAGS_if_log_bucket_dist_when_flash, FLAGS_threshold_use_skiplist));
|
||||
options.prefix_extractor.reset(
|
||||
rocksdb::NewFixedPrefixTransform(FLAGS_prefix_length));
|
||||
} else if (FLAGS_memtablerep == "cuckoo") {
|
||||
factory.reset(rocksdb::NewHashCuckooRepFactory(
|
||||
FLAGS_write_buffer_size, FLAGS_average_data_size,
|
||||
static_cast<uint32_t>(FLAGS_hash_function_count)));
|
||||
options.prefix_extractor.reset(
|
||||
rocksdb::NewFixedPrefixTransform(FLAGS_prefix_length));
|
||||
#endif // ROCKSDB_LITE
|
||||
} else {
|
||||
fprintf(stdout, "Unknown memtablerep: %s\n", FLAGS_memtablerep.c_str());
|
||||
|
@ -703,8 +703,8 @@ TEST_F(OptionsTest, GetMemTableRepFactoryFromString) {
|
||||
&new_mem_factory));
|
||||
|
||||
ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo", &new_mem_factory));
|
||||
ASSERT_OK(GetMemTableRepFactoryFromString("cuckoo:1024", &new_mem_factory));
|
||||
ASSERT_EQ(std::string(new_mem_factory->Name()), "HashCuckooRepFactory");
|
||||
// CuckooHash memtable is already removed.
|
||||
ASSERT_NOK(GetMemTableRepFactoryFromString("cuckoo:1024", &new_mem_factory));
|
||||
|
||||
ASSERT_NOK(GetMemTableRepFactoryFromString("bad_factory", &new_mem_factory));
|
||||
}
|
||||
|
1
src.mk
1
src.mk
@ -66,7 +66,6 @@ LIB_SOURCES = \
|
||||
env/io_posix.cc \
|
||||
env/mock_env.cc \
|
||||
memtable/alloc_tracker.cc \
|
||||
memtable/hash_cuckoo_rep.cc \
|
||||
memtable/hash_linklist_rep.cc \
|
||||
memtable/hash_skiplist_rep.cc \
|
||||
memtable/skiplistrep.cc \
|
||||
|
@ -133,15 +133,8 @@ Status GetMemTableRepFactoryFromString(
|
||||
mem_factory = new VectorRepFactory();
|
||||
}
|
||||
} else if (opts_list[0] == "cuckoo") {
|
||||
// Expecting format
|
||||
// cuckoo:<write_buffer_size>
|
||||
if (2 == len) {
|
||||
size_t write_buffer_size = ParseSizeT(opts_list[1]);
|
||||
mem_factory = NewHashCuckooRepFactory(write_buffer_size);
|
||||
} else if (1 == len) {
|
||||
return Status::InvalidArgument("Can't parse memtable_factory option ",
|
||||
opts_str);
|
||||
}
|
||||
return Status::NotSupported(
|
||||
"cuckoo hash memtable is not supported anymore.");
|
||||
} else {
|
||||
return Status::InvalidArgument("Unrecognized memtable_factory option ",
|
||||
opts_str);
|
||||
|
@ -1099,7 +1099,6 @@ enum RepFactory {
|
||||
kPrefixHash,
|
||||
kVectorRep,
|
||||
kHashLinkedList,
|
||||
kCuckoo
|
||||
};
|
||||
|
||||
static enum RepFactory StringToRepFactory(const char* ctype) {
|
||||
@ -1113,8 +1112,6 @@ static enum RepFactory StringToRepFactory(const char* ctype) {
|
||||
return kVectorRep;
|
||||
else if (!strcasecmp(ctype, "hash_linkedlist"))
|
||||
return kHashLinkedList;
|
||||
else if (!strcasecmp(ctype, "cuckoo"))
|
||||
return kCuckoo;
|
||||
|
||||
fprintf(stdout, "Cannot parse memreptable %s\n", ctype);
|
||||
return kSkipList;
|
||||
@ -2186,9 +2183,6 @@ class Benchmark {
|
||||
case kHashLinkedList:
|
||||
fprintf(stdout, "Memtablerep: hash_linkedlist\n");
|
||||
break;
|
||||
case kCuckoo:
|
||||
fprintf(stdout, "Memtablerep: cuckoo\n");
|
||||
break;
|
||||
}
|
||||
fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level);
|
||||
|
||||
@ -3292,10 +3286,6 @@ void VerifyDBFromDB(std::string& truth_db_name) {
|
||||
new VectorRepFactory
|
||||
);
|
||||
break;
|
||||
case kCuckoo:
|
||||
options.memtable_factory.reset(NewHashCuckooRepFactory(
|
||||
options.write_buffer_size, FLAGS_key_size + FLAGS_value_size));
|
||||
break;
|
||||
#else
|
||||
default:
|
||||
fprintf(stderr, "Only skip list is supported in lite mode\n");
|
||||
|
Loading…
Reference in New Issue
Block a user