Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
5d25a46936
@ -408,9 +408,15 @@ TEST(ColumnFamilyTest, WriteBatchFailure) {
|
||||
Open();
|
||||
CreateColumnFamiliesAndReopen({"one", "two"});
|
||||
WriteBatch batch;
|
||||
batch.Put(handles_[0], Slice("existing"), Slice("column-family"));
|
||||
batch.Put(handles_[1], Slice("non-existing"), Slice("column-family"));
|
||||
ASSERT_OK(db_->Write(WriteOptions(), &batch));
|
||||
DropColumnFamilies({1});
|
||||
WriteOptions woptions_ignore_missing_cf;
|
||||
woptions_ignore_missing_cf.ignore_missing_column_families = true;
|
||||
batch.Put(handles_[0], Slice("still here"), Slice("column-family"));
|
||||
ASSERT_OK(db_->Write(woptions_ignore_missing_cf, &batch));
|
||||
ASSERT_EQ("column-family", Get(0, "still here"));
|
||||
Status s = db_->Write(WriteOptions(), &batch);
|
||||
ASSERT_TRUE(s.IsInvalidArgument());
|
||||
Close();
|
||||
|
142
db/db_impl.cc
142
db/db_impl.cc
@ -290,8 +290,10 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
Status SanitizeDBOptionsByCFOptions(
|
||||
DBOptions* db_opts,
|
||||
const DBOptions* db_opts,
|
||||
const std::vector<ColumnFamilyDescriptor>& column_families) {
|
||||
Status s;
|
||||
for (auto cf : column_families) {
|
||||
@ -303,7 +305,6 @@ Status SanitizeDBOptionsByCFOptions(
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
namespace {
|
||||
CompressionType GetCompressionFlush(const Options& options) {
|
||||
// Compressing memtable flushes might not help unless the sequential load
|
||||
// optimization is used for leveled compaction. Otherwise the CPU and
|
||||
@ -631,7 +632,7 @@ bool CompareCandidateFile(const rocksdb::DBImpl::CandidateFileInfo& first,
|
||||
} else if (first.file_name < second.file_name) {
|
||||
return false;
|
||||
} else {
|
||||
return (first.path_id > first.path_id);
|
||||
return (first.path_id > second.path_id);
|
||||
}
|
||||
}
|
||||
}; // namespace
|
||||
@ -1301,14 +1302,20 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
|
||||
WriteBatch batch;
|
||||
while (reader.ReadRecord(&record, &scratch)) {
|
||||
if (record.size() < 12) {
|
||||
reporter.Corruption(
|
||||
record.size(), Status::Corruption("log record too small"));
|
||||
reporter.Corruption(record.size(),
|
||||
Status::Corruption("log record too small"));
|
||||
continue;
|
||||
}
|
||||
WriteBatchInternal::SetContents(&batch, record);
|
||||
|
||||
// If column family was not found, it might mean that the WAL write
|
||||
// batch references to the column family that was dropped after the
|
||||
// insert. We don't want to fail the whole write batch in that case -- we
|
||||
// just ignore the update. That's why we set ignore missing column families
|
||||
// to true
|
||||
status = WriteBatchInternal::InsertInto(
|
||||
&batch, column_family_memtables_.get(), true, log_number);
|
||||
&batch, column_family_memtables_.get(),
|
||||
true /* ignore missing column families */, log_number);
|
||||
|
||||
MaybeIgnoreError(&status);
|
||||
if (!status.ok()) {
|
||||
@ -1677,6 +1684,13 @@ Status DBImpl::CompactRange(ColumnFamilyHandle* column_family,
|
||||
}
|
||||
LogFlush(options_.info_log);
|
||||
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
// an automatic compaction that has been scheduled might have been
|
||||
// preempted by the manual compactions. Need to schedule it back.
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1864,18 +1878,15 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level,
|
||||
bg_cv_.Wait();
|
||||
} else {
|
||||
manual_compaction_ = &manual;
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
assert(bg_compaction_scheduled_ == 0);
|
||||
bg_compaction_scheduled_++;
|
||||
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
|
||||
}
|
||||
}
|
||||
|
||||
assert(!manual.in_progress);
|
||||
assert(bg_manual_only_ > 0);
|
||||
--bg_manual_only_;
|
||||
if (bg_manual_only_ == 0) {
|
||||
// an automatic compaction should have been scheduled might have be
|
||||
// preempted by the manual compactions. Need to schedule it back.
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
}
|
||||
return manual.status;
|
||||
}
|
||||
|
||||
@ -1963,11 +1974,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
||||
|
||||
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable
|
||||
// flush, but the HIGH pool is not enabled)
|
||||
// Do it only if max_background_compactions hasn't been reached and, in case
|
||||
// bg_manual_only_ > 0, if it's a manual compaction.
|
||||
if ((manual_compaction_ || is_compaction_needed ||
|
||||
(is_flush_pending && options_.max_background_flushes == 0)) &&
|
||||
(!bg_manual_only_ || manual_compaction_)) {
|
||||
// Do it only if max_background_compactions hasn't been reached and
|
||||
// bg_manual_only_ == 0
|
||||
if (!bg_manual_only_ &&
|
||||
(is_compaction_needed ||
|
||||
(is_flush_pending && options_.max_background_flushes == 0))) {
|
||||
if (bg_compaction_scheduled_ < options_.max_background_compactions) {
|
||||
bg_compaction_scheduled_++;
|
||||
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
|
||||
@ -1979,7 +1990,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
||||
}
|
||||
|
||||
void DBImpl::RecordFlushIOStats() {
|
||||
RecordTick(stats_, FLUSH_WRITE_BYTES, iostats_context.bytes_written);
|
||||
RecordTick(stats_, FLUSH_WRITE_BYTES, IOSTATS(bytes_written));
|
||||
IOSTATS_RESET(bytes_written);
|
||||
}
|
||||
|
||||
@ -2194,6 +2205,10 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||
if (is_manual) {
|
||||
// another thread cannot pick up the same work
|
||||
manual_compaction_->in_progress = true;
|
||||
} else if (manual_compaction_ != nullptr) {
|
||||
// there should be no automatic compactions running when manual compaction
|
||||
// is running
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// FLUSH preempts compaction
|
||||
@ -2313,7 +2328,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||
|
||||
if (status.ok()) {
|
||||
// Done
|
||||
} else if (shutting_down_.Acquire_Load()) {
|
||||
} else if (status.IsShutdownInProgress()) {
|
||||
// Ignore compaction errors found during shutting down
|
||||
} else {
|
||||
Log(InfoLogLevel::WARN_LEVEL, options_.info_log, "Compaction error: %s",
|
||||
@ -2573,6 +2588,10 @@ inline SequenceNumber DBImpl::findEarliestVisibleSnapshot(
|
||||
uint64_t DBImpl::CallFlushDuringCompaction(ColumnFamilyData* cfd,
|
||||
DeletionState& deletion_state,
|
||||
LogBuffer* log_buffer) {
|
||||
if (options_.max_background_flushes > 0) {
|
||||
// flush thread will take care of this
|
||||
return 0;
|
||||
}
|
||||
if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) {
|
||||
const uint64_t imm_start = env_->NowMicros();
|
||||
mutex_.Lock();
|
||||
@ -2626,9 +2645,29 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
compaction_filter = compaction_filter_from_factory.get();
|
||||
}
|
||||
|
||||
int64_t key_drop_user = 0;
|
||||
int64_t key_drop_newer_entry = 0;
|
||||
int64_t key_drop_obsolete = 0;
|
||||
int64_t loop_cnt = 0;
|
||||
while (input->Valid() && !shutting_down_.Acquire_Load() &&
|
||||
!cfd->IsDropped()) {
|
||||
RecordCompactionIOStats();
|
||||
if (++loop_cnt > 1000) {
|
||||
if (key_drop_user > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_USER, key_drop_user);
|
||||
key_drop_user = 0;
|
||||
}
|
||||
if (key_drop_newer_entry > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY,
|
||||
key_drop_newer_entry);
|
||||
key_drop_newer_entry = 0;
|
||||
}
|
||||
if (key_drop_obsolete > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE, key_drop_obsolete);
|
||||
key_drop_obsolete = 0;
|
||||
}
|
||||
RecordCompactionIOStats();
|
||||
loop_cnt = 0;
|
||||
}
|
||||
// FLUSH preempts compaction
|
||||
// TODO(icanadi) this currently only checks if flush is necessary on
|
||||
// compacting column family. we should also check if flush is necessary on
|
||||
@ -2709,7 +2748,7 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
ParseInternalKey(key, &ikey);
|
||||
// no value associated with delete
|
||||
value.clear();
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_USER);
|
||||
++key_drop_user;
|
||||
} else if (value_changed) {
|
||||
value = compaction_filter_value;
|
||||
}
|
||||
@ -2733,7 +2772,7 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
// TODO: why not > ?
|
||||
assert(last_sequence_for_key >= ikey.sequence);
|
||||
drop = true; // (A)
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY);
|
||||
++key_drop_newer_entry;
|
||||
} else if (ikey.type == kTypeDeletion &&
|
||||
ikey.sequence <= earliest_snapshot &&
|
||||
compact->compaction->KeyNotExistsBeyondOutputLevel(ikey.user_key)) {
|
||||
@ -2745,7 +2784,7 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
// few iterations of this loop (by rule (A) above).
|
||||
// Therefore this deletion marker is obsolete and can be dropped.
|
||||
drop = true;
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE);
|
||||
++key_drop_obsolete;
|
||||
} else if (ikey.type == kTypeMerge) {
|
||||
if (!merge.HasOperator()) {
|
||||
LogToBuffer(log_buffer, "Options::merge_operator is null.");
|
||||
@ -2892,7 +2931,15 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
input->Next();
|
||||
}
|
||||
}
|
||||
|
||||
if (key_drop_user > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_USER, key_drop_user);
|
||||
}
|
||||
if (key_drop_newer_entry > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_NEWER_ENTRY, key_drop_newer_entry);
|
||||
}
|
||||
if (key_drop_obsolete > 0) {
|
||||
RecordTick(stats_, COMPACTION_KEY_DROP_OBSOLETE, key_drop_obsolete);
|
||||
}
|
||||
RecordCompactionIOStats();
|
||||
|
||||
return status;
|
||||
@ -3367,7 +3414,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
ColumnFamilyHandle* column_family, const Slice& key,
|
||||
std::string* value, bool* value_found) {
|
||||
StopWatch sw(env_, stats_, DB_GET);
|
||||
PERF_TIMER_AUTO(get_snapshot_time);
|
||||
PERF_TIMER_GUARD(get_snapshot_time);
|
||||
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||
auto cfd = cfh->cfd();
|
||||
@ -3391,6 +3438,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
// merge_operands will contain the sequence of merges in the latter case.
|
||||
LookupKey lkey(key, snapshot);
|
||||
PERF_TIMER_STOP(get_snapshot_time);
|
||||
|
||||
if (sv->mem->Get(lkey, value, &s, merge_context, *cfd->options())) {
|
||||
// Done
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
@ -3398,20 +3446,19 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
// Done
|
||||
RecordTick(stats_, MEMTABLE_HIT);
|
||||
} else {
|
||||
PERF_TIMER_START(get_from_output_files_time);
|
||||
|
||||
PERF_TIMER_GUARD(get_from_output_files_time);
|
||||
sv->current->Get(options, lkey, value, &s, &merge_context, value_found);
|
||||
PERF_TIMER_STOP(get_from_output_files_time);
|
||||
RecordTick(stats_, MEMTABLE_MISS);
|
||||
}
|
||||
|
||||
PERF_TIMER_START(get_post_process_time);
|
||||
{
|
||||
PERF_TIMER_GUARD(get_post_process_time);
|
||||
|
||||
ReturnAndCleanupSuperVersion(cfd, sv);
|
||||
ReturnAndCleanupSuperVersion(cfd, sv);
|
||||
|
||||
RecordTick(stats_, NUMBER_KEYS_READ);
|
||||
RecordTick(stats_, BYTES_READ, value->size());
|
||||
PERF_TIMER_STOP(get_post_process_time);
|
||||
RecordTick(stats_, NUMBER_KEYS_READ);
|
||||
RecordTick(stats_, BYTES_READ, value->size());
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -3421,7 +3468,7 @@ std::vector<Status> DBImpl::MultiGet(
|
||||
const std::vector<Slice>& keys, std::vector<std::string>* values) {
|
||||
|
||||
StopWatch sw(env_, stats_, DB_MULTIGET);
|
||||
PERF_TIMER_AUTO(get_snapshot_time);
|
||||
PERF_TIMER_GUARD(get_snapshot_time);
|
||||
|
||||
SequenceNumber snapshot;
|
||||
|
||||
@ -3497,7 +3544,7 @@ std::vector<Status> DBImpl::MultiGet(
|
||||
}
|
||||
|
||||
// Post processing (decrement reference counts and record statistics)
|
||||
PERF_TIMER_START(get_post_process_time);
|
||||
PERF_TIMER_GUARD(get_post_process_time);
|
||||
autovector<SuperVersion*> superversions_to_delete;
|
||||
|
||||
// TODO(icanadi) do we need lock here or just around Cleanup()?
|
||||
@ -3870,7 +3917,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
if (my_batch == nullptr) {
|
||||
return Status::Corruption("Batch is nullptr!");
|
||||
}
|
||||
PERF_TIMER_AUTO(write_pre_and_post_process_time);
|
||||
PERF_TIMER_GUARD(write_pre_and_post_process_time);
|
||||
Writer w(&mutex_);
|
||||
w.batch = my_batch;
|
||||
w.sync = options.sync;
|
||||
@ -4003,7 +4050,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
|
||||
uint64_t log_size = 0;
|
||||
if (!options.disableWAL) {
|
||||
PERF_TIMER_START(write_wal_time);
|
||||
PERF_TIMER_GUARD(write_wal_time);
|
||||
Slice log_entry = WriteBatchInternal::Contents(updates);
|
||||
status = log_->AddRecord(log_entry);
|
||||
total_log_size_ += log_entry.size();
|
||||
@ -4021,13 +4068,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
status = log_->file()->Sync();
|
||||
}
|
||||
}
|
||||
PERF_TIMER_STOP(write_wal_time);
|
||||
}
|
||||
if (status.ok()) {
|
||||
PERF_TIMER_START(write_memtable_time);
|
||||
PERF_TIMER_GUARD(write_memtable_time);
|
||||
|
||||
status = WriteBatchInternal::InsertInto(
|
||||
updates, column_family_memtables_.get(), false, 0, this, false);
|
||||
updates, column_family_memtables_.get(),
|
||||
options.ignore_missing_column_families, 0, this, false);
|
||||
// A non-OK status here indicates iteration failure (either in-memory
|
||||
// writebatch corruption (very bad), or the client specified invalid
|
||||
// column family). This will later on trigger bg_error_.
|
||||
@ -4036,8 +4083,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
// into the memtable would result in a state that some write ops might
|
||||
// have succeeded in memtable but Status reports error for all writes.
|
||||
|
||||
PERF_TIMER_STOP(write_memtable_time);
|
||||
|
||||
SetTickerCount(stats_, SEQUENCE_NUMBER, last_sequence);
|
||||
}
|
||||
PERF_TIMER_START(write_pre_and_post_process_time);
|
||||
@ -4071,7 +4116,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
RecordTick(stats_, WRITE_TIMEDOUT);
|
||||
}
|
||||
|
||||
PERF_TIMER_STOP(write_pre_and_post_process_time);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -4759,11 +4803,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
|
||||
column_families.push_back(
|
||||
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
|
||||
std::vector<ColumnFamilyHandle*> handles;
|
||||
Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
|
||||
Status s = DB::Open(db_options, dbname, column_families, &handles, dbptr);
|
||||
if (s.ok()) {
|
||||
assert(handles.size() == 1);
|
||||
// i can delete the handle since DBImpl is always holding a reference to
|
||||
@ -4776,6 +4816,10 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
|
||||
Status DB::Open(const DBOptions& db_options, const std::string& dbname,
|
||||
const std::vector<ColumnFamilyDescriptor>& column_families,
|
||||
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
|
||||
Status s = SanitizeDBOptionsByCFOptions(&db_options, column_families);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
if (db_options.db_paths.size() > 1) {
|
||||
for (auto& cfd : column_families) {
|
||||
if (cfd.options.compaction_style != kCompactionStyleUniversal) {
|
||||
@ -4801,7 +4845,7 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
|
||||
}
|
||||
|
||||
DBImpl* impl = new DBImpl(db_options, dbname);
|
||||
Status s = impl->env_->CreateDirIfMissing(impl->options_.wal_dir);
|
||||
s = impl->env_->CreateDirIfMissing(impl->options_.wal_dir);
|
||||
if (s.ok()) {
|
||||
for (auto db_path : impl->options_.db_paths) {
|
||||
s = impl->env_->CreateDirIfMissing(db_path.path);
|
||||
|
@ -74,6 +74,8 @@ class DBImplReadOnly : public DBImpl {
|
||||
uint32_t target_path_id = 0) override {
|
||||
return Status::NotSupported("Not supported operation in read only mode.");
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
virtual Status DisableFileDeletions() override {
|
||||
return Status::NotSupported("Not supported operation in read only mode.");
|
||||
}
|
||||
@ -85,6 +87,8 @@ class DBImplReadOnly : public DBImpl {
|
||||
bool flush_memtable = true) override {
|
||||
return Status::NotSupported("Not supported operation in read only mode.");
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
using DBImpl::Flush;
|
||||
virtual Status Flush(const FlushOptions& options,
|
||||
ColumnFamilyHandle* column_family) override {
|
||||
|
@ -194,9 +194,8 @@ void DBIter::Next() {
|
||||
// NOTE: In between, saved_key_ can point to a user key that has
|
||||
// a delete marker
|
||||
inline void DBIter::FindNextUserEntry(bool skipping) {
|
||||
PERF_TIMER_AUTO(find_next_user_entry_time);
|
||||
PERF_TIMER_GUARD(find_next_user_entry_time);
|
||||
FindNextUserEntryInternal(skipping);
|
||||
PERF_TIMER_STOP(find_next_user_entry_time);
|
||||
}
|
||||
|
||||
// Actual implementation of DBIter::FindNextUserEntry()
|
||||
@ -557,9 +556,12 @@ void DBIter::Seek(const Slice& target) {
|
||||
saved_key_.Clear();
|
||||
// now savved_key is used to store internal key.
|
||||
saved_key_.SetInternalKey(target, sequence_);
|
||||
PERF_TIMER_AUTO(seek_internal_seek_time);
|
||||
iter_->Seek(saved_key_.GetKey());
|
||||
PERF_TIMER_STOP(seek_internal_seek_time);
|
||||
|
||||
{
|
||||
PERF_TIMER_GUARD(seek_internal_seek_time);
|
||||
iter_->Seek(saved_key_.GetKey());
|
||||
}
|
||||
|
||||
if (iter_->Valid()) {
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
@ -577,9 +579,12 @@ void DBIter::SeekToFirst() {
|
||||
}
|
||||
direction_ = kForward;
|
||||
ClearSavedValue();
|
||||
PERF_TIMER_AUTO(seek_internal_seek_time);
|
||||
iter_->SeekToFirst();
|
||||
PERF_TIMER_STOP(seek_internal_seek_time);
|
||||
|
||||
{
|
||||
PERF_TIMER_GUARD(seek_internal_seek_time);
|
||||
iter_->SeekToFirst();
|
||||
}
|
||||
|
||||
if (iter_->Valid()) {
|
||||
FindNextUserEntry(false /* not skipping */);
|
||||
} else {
|
||||
@ -595,9 +600,11 @@ void DBIter::SeekToLast() {
|
||||
}
|
||||
direction_ = kReverse;
|
||||
ClearSavedValue();
|
||||
PERF_TIMER_AUTO(seek_internal_seek_time);
|
||||
iter_->SeekToLast();
|
||||
PERF_TIMER_STOP(seek_internal_seek_time);
|
||||
|
||||
{
|
||||
PERF_TIMER_GUARD(seek_internal_seek_time);
|
||||
iter_->SeekToLast();
|
||||
}
|
||||
|
||||
PrevInternal();
|
||||
}
|
||||
|
@ -7554,7 +7554,7 @@ TEST(DBTest, SimpleWriteTimeoutTest) {
|
||||
ASSERT_OK(Put(Key(2), Key(2) + std::string(100000, 'v'), write_opt));
|
||||
// As the only two write buffers are full in this moment, the third
|
||||
// Put is expected to be timed-out.
|
||||
write_opt.timeout_hint_us = 300;
|
||||
write_opt.timeout_hint_us = 50;
|
||||
ASSERT_TRUE(
|
||||
Put(Key(3), Key(3) + std::string(100000, 'v'), write_opt).IsTimedOut());
|
||||
}
|
||||
|
@ -127,26 +127,6 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||
}
|
||||
}
|
||||
|
||||
const char* InternalFilterPolicy::Name() const {
|
||||
return user_policy_->Name();
|
||||
}
|
||||
|
||||
void InternalFilterPolicy::CreateFilter(const Slice* keys, int n,
|
||||
std::string* dst) const {
|
||||
// We rely on the fact that the code in table.cc does not mind us
|
||||
// adjusting keys[].
|
||||
Slice* mkey = const_cast<Slice*>(keys);
|
||||
for (int i = 0; i < n; i++) {
|
||||
mkey[i] = ExtractUserKey(keys[i]);
|
||||
// TODO(sanjay): Suppress dups?
|
||||
}
|
||||
user_policy_->CreateFilter(keys, n, dst);
|
||||
}
|
||||
|
||||
bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const {
|
||||
return user_policy_->KeyMayMatch(ExtractUserKey(key), f);
|
||||
}
|
||||
|
||||
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
|
||||
size_t usize = user_key.size();
|
||||
size_t needed = usize + 13; // A conservative estimate
|
||||
|
@ -124,19 +124,6 @@ class InternalKeyComparator : public Comparator {
|
||||
int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
|
||||
};
|
||||
|
||||
// Filter policy wrapper that converts from internal keys to user keys
|
||||
class InternalFilterPolicy : public FilterPolicy {
|
||||
private:
|
||||
std::shared_ptr<const FilterPolicy> shared_ptr_;
|
||||
const FilterPolicy* const user_policy_;
|
||||
public:
|
||||
explicit InternalFilterPolicy(std::shared_ptr<const FilterPolicy> p)
|
||||
: shared_ptr_(p), user_policy_(p.get()) {}
|
||||
virtual const char* Name() const;
|
||||
virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
|
||||
};
|
||||
|
||||
// Modules in this directory should keep internal keys wrapped inside
|
||||
// the following class instead of plain strings so that we do not
|
||||
// incorrectly use string comparisons instead of an InternalKeyComparator.
|
||||
|
@ -6,9 +6,10 @@
|
||||
#ifndef ROCKSDB_LITE
|
||||
#include "db/forward_iterator.h"
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
|
||||
#include "db/db_impl.h"
|
||||
#include "db/db_iter.h"
|
||||
#include "db/column_family.h"
|
||||
@ -37,12 +38,16 @@ class LevelIterator : public Iterator {
|
||||
assert(file_index < files_.size());
|
||||
if (file_index != file_index_) {
|
||||
file_index_ = file_index;
|
||||
file_iter_.reset(cfd_->table_cache()->NewIterator(
|
||||
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
|
||||
files_[file_index_]->fd, nullptr /* table_reader_ptr */, false));
|
||||
Reset();
|
||||
}
|
||||
valid_ = false;
|
||||
}
|
||||
void Reset() {
|
||||
assert(file_index_ < files_.size());
|
||||
file_iter_.reset(cfd_->table_cache()->NewIterator(
|
||||
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
|
||||
files_[file_index_]->fd, nullptr /* table_reader_ptr */, false));
|
||||
}
|
||||
void SeekToLast() override {
|
||||
status_ = Status::NotSupported("LevelIterator::SeekToLast()");
|
||||
valid_ = false;
|
||||
@ -63,12 +68,15 @@ class LevelIterator : public Iterator {
|
||||
assert(file_iter_ != nullptr);
|
||||
file_iter_->Seek(internal_key);
|
||||
valid_ = file_iter_->Valid();
|
||||
assert(valid_);
|
||||
}
|
||||
void Next() override {
|
||||
assert(valid_);
|
||||
file_iter_->Next();
|
||||
while (!file_iter_->Valid()) {
|
||||
for (;;) {
|
||||
if (file_iter_->status().IsIncomplete() || file_iter_->Valid()) {
|
||||
valid_ = !file_iter_->status().IsIncomplete();
|
||||
return;
|
||||
}
|
||||
if (file_index_ + 1 >= files_.size()) {
|
||||
valid_ = false;
|
||||
return;
|
||||
@ -76,7 +84,6 @@ class LevelIterator : public Iterator {
|
||||
SetFileIndex(file_index_ + 1);
|
||||
file_iter_->SeekToFirst();
|
||||
}
|
||||
valid_ = file_iter_->Valid();
|
||||
}
|
||||
Slice key() const override {
|
||||
assert(valid_);
|
||||
@ -160,6 +167,8 @@ void ForwardIterator::SeekToFirst() {
|
||||
if (sv_ == nullptr ||
|
||||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
|
||||
RebuildIterators();
|
||||
} else if (status_.IsIncomplete()) {
|
||||
ResetIncompleteIterators();
|
||||
}
|
||||
SeekInternal(Slice(), true);
|
||||
}
|
||||
@ -168,6 +177,8 @@ void ForwardIterator::Seek(const Slice& internal_key) {
|
||||
if (sv_ == nullptr ||
|
||||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
|
||||
RebuildIterators();
|
||||
} else if (status_.IsIncomplete()) {
|
||||
ResetIncompleteIterators();
|
||||
}
|
||||
SeekInternal(internal_key, false);
|
||||
}
|
||||
@ -211,7 +222,15 @@ void ForwardIterator::SeekInternal(const Slice& internal_key,
|
||||
}
|
||||
l0_iters_[i]->Seek(internal_key);
|
||||
}
|
||||
if (l0_iters_[i]->Valid()) {
|
||||
|
||||
if (l0_iters_[i]->status().IsIncomplete()) {
|
||||
// if any of the immutable iterators is incomplete (no-io option was
|
||||
// used), we are unable to reliably find the smallest key
|
||||
assert(read_options_.read_tier == kBlockCacheTier);
|
||||
status_ = l0_iters_[i]->status();
|
||||
valid_ = false;
|
||||
return;
|
||||
} else if (l0_iters_[i]->Valid()) {
|
||||
immutable_min_heap_.push(l0_iters_[i]);
|
||||
}
|
||||
}
|
||||
@ -280,7 +299,14 @@ void ForwardIterator::SeekInternal(const Slice& internal_key,
|
||||
level_iters_[level - 1]->SetFileIndex(f_idx);
|
||||
seek_to_first ? level_iters_[level - 1]->SeekToFirst() :
|
||||
level_iters_[level - 1]->Seek(internal_key);
|
||||
if (level_iters_[level - 1]->Valid()) {
|
||||
|
||||
if (level_iters_[level - 1]->status().IsIncomplete()) {
|
||||
// see above
|
||||
assert(read_options_.read_tier == kBlockCacheTier);
|
||||
status_ = level_iters_[level - 1]->status();
|
||||
valid_ = false;
|
||||
return;
|
||||
} else if (level_iters_[level - 1]->Valid()) {
|
||||
immutable_min_heap_.push(level_iters_[level - 1]);
|
||||
}
|
||||
}
|
||||
@ -304,7 +330,7 @@ void ForwardIterator::Next() {
|
||||
assert(valid_);
|
||||
|
||||
if (sv_ == nullptr ||
|
||||
sv_ ->version_number != cfd_->GetSuperVersionNumber()) {
|
||||
sv_->version_number != cfd_->GetSuperVersionNumber()) {
|
||||
std::string current_key = key().ToString();
|
||||
Slice old_key(current_key.data(), current_key.size());
|
||||
|
||||
@ -320,9 +346,17 @@ void ForwardIterator::Next() {
|
||||
}
|
||||
|
||||
current_->Next();
|
||||
if (current_->Valid() && current_ != mutable_iter_) {
|
||||
immutable_min_heap_.push(current_);
|
||||
if (current_ != mutable_iter_) {
|
||||
if (current_->status().IsIncomplete()) {
|
||||
assert(read_options_.read_tier == kBlockCacheTier);
|
||||
status_ = current_->status();
|
||||
valid_ = false;
|
||||
return;
|
||||
} else if (current_->Valid()) {
|
||||
immutable_min_heap_.push(current_);
|
||||
}
|
||||
}
|
||||
|
||||
UpdateCurrent();
|
||||
}
|
||||
|
||||
@ -389,6 +423,29 @@ void ForwardIterator::RebuildIterators() {
|
||||
is_prev_set_ = false;
|
||||
}
|
||||
|
||||
void ForwardIterator::ResetIncompleteIterators() {
|
||||
const auto& l0_files = sv_->current->files_[0];
|
||||
for (uint32_t i = 0; i < l0_iters_.size(); ++i) {
|
||||
assert(i < l0_files.size());
|
||||
if (!l0_iters_[i]->status().IsIncomplete()) {
|
||||
continue;
|
||||
}
|
||||
delete l0_iters_[i];
|
||||
l0_iters_[i] = cfd_->table_cache()->NewIterator(
|
||||
read_options_, *cfd_->soptions(), cfd_->internal_comparator(),
|
||||
l0_files[i]->fd);
|
||||
}
|
||||
|
||||
for (auto* level_iter : level_iters_) {
|
||||
if (level_iter && level_iter->status().IsIncomplete()) {
|
||||
level_iter->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
current_ = nullptr;
|
||||
is_prev_set_ = false;
|
||||
}
|
||||
|
||||
void ForwardIterator::UpdateCurrent() {
|
||||
if (immutable_min_heap_.empty() && !mutable_iter_->Valid()) {
|
||||
current_ = nullptr;
|
||||
@ -417,7 +474,7 @@ void ForwardIterator::UpdateCurrent() {
|
||||
}
|
||||
|
||||
bool ForwardIterator::NeedToSeekImmutable(const Slice& target) {
|
||||
if (!is_prev_set_) {
|
||||
if (!valid_ || !is_prev_set_) {
|
||||
return true;
|
||||
}
|
||||
Slice prev_key = prev_key_.GetKey();
|
||||
|
@ -73,6 +73,7 @@ class ForwardIterator : public Iterator {
|
||||
private:
|
||||
void Cleanup();
|
||||
void RebuildIterators();
|
||||
void ResetIncompleteIterators();
|
||||
void SeekInternal(const Slice& internal_key, bool seek_to_first);
|
||||
void UpdateCurrent();
|
||||
bool NeedToSeekImmutable(const Slice& internal_key);
|
||||
|
@ -257,9 +257,11 @@ bool InternalStats::GetIntProperty(DBPropertyType property_type,
|
||||
cfd_->imm()->current()->GetTotalNumEntries() +
|
||||
current->GetEstimatedActiveKeys();
|
||||
return true;
|
||||
#ifndef ROCKSDB_LITE
|
||||
case kIsFileDeletionEnabled:
|
||||
*value = db->IsFileDeletionsEnabled();
|
||||
return true;
|
||||
#endif
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
// Avoiding recording stats for speed.
|
||||
return false;
|
||||
}
|
||||
PERF_TIMER_AUTO(get_from_memtable_time);
|
||||
PERF_TIMER_GUARD(get_from_memtable_time);
|
||||
|
||||
Slice user_key = key.user_key();
|
||||
bool found_final_value = false;
|
||||
@ -452,7 +452,6 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
if (!found_final_value && merge_in_progress) {
|
||||
*s = Status::MergeInProgress("");
|
||||
}
|
||||
PERF_TIMER_STOP(get_from_memtable_time);
|
||||
PERF_COUNTER_ADD(get_from_memtable_count, 1);
|
||||
return found_final_value;
|
||||
}
|
||||
|
@ -556,7 +556,7 @@ public:
|
||||
WritableFile* file,
|
||||
CompressionType compression_type) const;
|
||||
|
||||
virtual Status SanitizeDBOptions(DBOptions* db_opts) const override {
|
||||
virtual Status SanitizeDBOptions(const DBOptions* db_opts) const override {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -299,17 +299,17 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
public:
|
||||
SequenceNumber sequence_;
|
||||
ColumnFamilyMemTables* cf_mems_;
|
||||
bool recovery_;
|
||||
bool ignore_missing_column_families_;
|
||||
uint64_t log_number_;
|
||||
DBImpl* db_;
|
||||
const bool dont_filter_deletes_;
|
||||
|
||||
MemTableInserter(SequenceNumber sequence, ColumnFamilyMemTables* cf_mems,
|
||||
bool recovery, uint64_t log_number, DB* db,
|
||||
const bool dont_filter_deletes)
|
||||
bool ignore_missing_column_families, uint64_t log_number,
|
||||
DB* db, const bool dont_filter_deletes)
|
||||
: sequence_(sequence),
|
||||
cf_mems_(cf_mems),
|
||||
recovery_(recovery),
|
||||
ignore_missing_column_families_(ignore_missing_column_families),
|
||||
log_number_(log_number),
|
||||
db_(reinterpret_cast<DBImpl*>(db)),
|
||||
dont_filter_deletes_(dont_filter_deletes) {
|
||||
@ -321,12 +321,18 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
|
||||
bool SeekToColumnFamily(uint32_t column_family_id, Status* s) {
|
||||
bool found = cf_mems_->Seek(column_family_id);
|
||||
if (recovery_ && (!found || log_number_ < cf_mems_->GetLogNumber())) {
|
||||
// if in recovery envoronment:
|
||||
// * If column family was not found, it might mean that the WAL write
|
||||
// batch references to the column family that was dropped after the
|
||||
// insert. We don't want to fail the whole write batch in that case -- we
|
||||
// just ignore the update.
|
||||
if (!found) {
|
||||
if (ignore_missing_column_families_) {
|
||||
*s = Status::OK();
|
||||
} else {
|
||||
*s = Status::InvalidArgument(
|
||||
"Invalid column family specified in write batch");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (log_number_ != 0 && log_number_ < cf_mems_->GetLogNumber()) {
|
||||
// This is true only in recovery environment (log_number_ is always 0 in
|
||||
// non-recovery, regular write code-path)
|
||||
// * If log_number_ < cf_mems_->GetLogNumber(), this means that column
|
||||
// family already contains updates from this log. We can't apply updates
|
||||
// twice because of update-in-place or merge workloads -- ignore the
|
||||
@ -334,18 +340,8 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
*s = Status::OK();
|
||||
return false;
|
||||
}
|
||||
if (!found) {
|
||||
assert(!recovery_);
|
||||
// If the column family was not found in non-recovery enviornment
|
||||
// (client's write code-path), we have to fail the write and return
|
||||
// the failure status to the client.
|
||||
*s = Status::InvalidArgument(
|
||||
"Invalid column family specified in write batch");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
|
||||
const Slice& value) {
|
||||
Status seek_status;
|
||||
@ -503,10 +499,12 @@ class MemTableInserter : public WriteBatch::Handler {
|
||||
|
||||
Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
||||
ColumnFamilyMemTables* memtables,
|
||||
bool recovery, uint64_t log_number,
|
||||
DB* db, const bool dont_filter_deletes) {
|
||||
bool ignore_missing_column_families,
|
||||
uint64_t log_number, DB* db,
|
||||
const bool dont_filter_deletes) {
|
||||
MemTableInserter inserter(WriteBatchInternal::Sequence(b), memtables,
|
||||
recovery, log_number, db, dont_filter_deletes);
|
||||
ignore_missing_column_families, log_number, db,
|
||||
dont_filter_deletes);
|
||||
return b->Iterate(&inserter);
|
||||
}
|
||||
|
||||
|
@ -106,18 +106,18 @@ class WriteBatchInternal {
|
||||
// Inserts batch entries into memtable
|
||||
// If dont_filter_deletes is false AND options.filter_deletes is true,
|
||||
// then --> Drops deletes in batch if db->KeyMayExist returns false
|
||||
// If recovery == true, this means InsertInto is executed on a recovery
|
||||
// code-path. WriteBatch referencing a dropped column family can be
|
||||
// found on a recovery code-path and should be ignored (recovery should not
|
||||
// fail). Additionally, the memtable will be updated only if
|
||||
// If ignore_missing_column_families == true. WriteBatch referencing
|
||||
// non-existing column family should be ignored.
|
||||
// However, if ignore_missing_column_families == false, any WriteBatch
|
||||
// referencing non-existing column family will return a InvalidArgument()
|
||||
// failure.
|
||||
//
|
||||
// If log_number is non-zero, the memtable will be updated only if
|
||||
// memtables->GetLogNumber() >= log_number
|
||||
// However, if recovery == false, any WriteBatch referencing
|
||||
// non-existing column family will return a failure. Also, log_number is
|
||||
// ignored in that case
|
||||
static Status InsertInto(const WriteBatch* batch,
|
||||
ColumnFamilyMemTables* memtables,
|
||||
bool recovery = false, uint64_t log_number = 0,
|
||||
DB* db = nullptr,
|
||||
bool ignore_missing_column_families = false,
|
||||
uint64_t log_number = 0, DB* db = nullptr,
|
||||
const bool dont_filter_deletes = true);
|
||||
|
||||
static void Append(WriteBatch* dst, const WriteBatch* src);
|
||||
|
@ -27,7 +27,9 @@ struct IOStatsContext {
|
||||
uint64_t bytes_read;
|
||||
};
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
extern __thread IOStatsContext iostats_context;
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
|
@ -959,7 +959,17 @@ struct WriteOptions {
|
||||
// Default: 0
|
||||
uint64_t timeout_hint_us;
|
||||
|
||||
WriteOptions() : sync(false), disableWAL(false), timeout_hint_us(0) {}
|
||||
// If true and if user is trying to write to column families that don't exist
|
||||
// (they were dropped), ignore the write (don't return an error). If there
|
||||
// are multiple writes in a WriteBatch, other writes will succeed.
|
||||
// Default: false
|
||||
bool ignore_missing_column_families;
|
||||
|
||||
WriteOptions()
|
||||
: sync(false),
|
||||
disableWAL(false),
|
||||
timeout_hint_us(0),
|
||||
ignore_missing_column_families(false) {}
|
||||
};
|
||||
|
||||
// Options that control flush operations
|
||||
|
@ -96,7 +96,7 @@ class Status {
|
||||
// Returns true iff the status indicates Incomplete
|
||||
bool IsIncomplete() const { return code() == kIncomplete; }
|
||||
|
||||
// Returns true iff the status indicates Incomplete
|
||||
// Returns true iff the status indicates Shutdown In progress
|
||||
bool IsShutdownInProgress() const { return code() == kShutdownInProgress; }
|
||||
|
||||
bool IsTimedOut() const { return code() == kTimedOut; }
|
||||
|
@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
|
||||
PlainTableOptions());
|
||||
|
||||
struct CuckooTablePropertyNames {
|
||||
// The key that is used to fill empty buckets.
|
||||
static const std::string kEmptyKey;
|
||||
// Fixed length of value.
|
||||
static const std::string kValueLength;
|
||||
static const std::string kNumHashTable;
|
||||
static const std::string kMaxNumBuckets;
|
||||
// Number of hash functions used in Cuckoo Hash.
|
||||
static const std::string kNumHashFunc;
|
||||
// It denotes the number of buckets in a Cuckoo Block. Given a key and a
|
||||
// particular hash function, a Cuckoo Block is a set of consecutive buckets,
|
||||
// where starting bucket id is given by the hash function on the key. In case
|
||||
// of a collision during inserting the key, the builder tries to insert the
|
||||
// key in other locations of the cuckoo block before using the next hash
|
||||
// function. This reduces cache miss during read operation in case of
|
||||
// collision.
|
||||
static const std::string kCuckooBlockSize;
|
||||
// Size of the hash table. Use this number to compute the modulo of hash
|
||||
// function. The actual number of buckets will be kMaxHashTableSize +
|
||||
// kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
|
||||
// accommodate the Cuckoo Block from end of hash table, due to cache friendly
|
||||
// implementation.
|
||||
static const std::string kHashTableSize;
|
||||
// Denotes if the key sorted in the file is Internal Key (if false)
|
||||
// or User Key only (if true).
|
||||
static const std::string kIsLastLevel;
|
||||
};
|
||||
|
||||
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
|
||||
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
|
||||
// result in larger hash tables with fewer collisions.
|
||||
// @max_search_depth: A property used by builder to determine the depth to go to
|
||||
// to search for a path to displace elements in case of
|
||||
// collision. See Builder.MakeSpaceForKey method. Higher
|
||||
// values result in more efficient hash tables with fewer
|
||||
// lookups but take more time to build.
|
||||
// @cuckoo_block_size: In case of collision while inserting, the builder
|
||||
// attempts to insert in the next cuckoo_block_size
|
||||
// locations before skipping over to the next Cuckoo hash
|
||||
// function. This makes lookups more cache friendly in case
|
||||
// of collisions.
|
||||
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
|
||||
uint32_t max_search_depth = 100);
|
||||
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
@ -300,7 +331,7 @@ class TableFactory {
|
||||
//
|
||||
// If the function cannot find a way to sanitize the input DB Options,
|
||||
// a non-ok Status will be returned.
|
||||
virtual Status SanitizeDBOptions(DBOptions* db_opts) const = 0;
|
||||
virtual Status SanitizeDBOptions(const DBOptions* db_opts) const = 0;
|
||||
|
||||
// Return a string that contains printable format of table configurations.
|
||||
// RocksDB prints configurations at DB Open().
|
||||
|
@ -43,7 +43,7 @@ class AdaptiveTableFactory : public TableFactory {
|
||||
override;
|
||||
|
||||
// Sanitizes the specified DB Options.
|
||||
Status SanitizeDBOptions(DBOptions* db_opts) const override {
|
||||
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
|
||||
if (db_opts->allow_mmap_reads == false) {
|
||||
return Status::NotSupported(
|
||||
"AdaptiveTable with allow_mmap_reads == false is not supported.");
|
||||
|
@ -116,7 +116,7 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
||||
public:
|
||||
explicit ShortenedIndexBuilder(const Comparator* comparator)
|
||||
: IndexBuilder(comparator),
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
|
||||
index_block_builder_(1 /* block_restart_interval == 1 */) {}
|
||||
|
||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||
const Slice* first_key_in_next_block,
|
||||
@ -420,7 +420,7 @@ struct BlockBasedTableBuilder::Rep {
|
||||
table_options(table_opt),
|
||||
internal_comparator(icomparator),
|
||||
file(f),
|
||||
data_block(table_options.block_restart_interval, &internal_comparator),
|
||||
data_block(table_options.block_restart_interval),
|
||||
internal_prefix_transform(options.prefix_extractor.get()),
|
||||
index_builder(CreateIndexBuilder(
|
||||
table_options.index_type, &internal_comparator,
|
||||
@ -492,7 +492,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
}
|
||||
|
||||
if (r->filter_block != nullptr) {
|
||||
r->filter_block->AddKey(key);
|
||||
r->filter_block->AddKey(ExtractUserKey(key));
|
||||
}
|
||||
|
||||
r->last_key.assign(key.data(), key.size());
|
||||
|
@ -38,10 +38,6 @@ BlockBasedTableFactory::BlockBasedTableFactory(
|
||||
table_options_.block_size_deviation > 100) {
|
||||
table_options_.block_size_deviation = 0;
|
||||
}
|
||||
if (table_options_.filter_policy) {
|
||||
auto* p = new InternalFilterPolicy(table_options_.filter_policy);
|
||||
table_options_.filter_policy.reset(p);
|
||||
}
|
||||
}
|
||||
|
||||
Status BlockBasedTableFactory::NewTableReader(
|
||||
|
@ -45,7 +45,7 @@ class BlockBasedTableFactory : public TableFactory {
|
||||
WritableFile* file, CompressionType compression_type) const override;
|
||||
|
||||
// Sanitizes the specified DB Options.
|
||||
Status SanitizeDBOptions(DBOptions* db_opts) const override {
|
||||
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -1067,9 +1067,8 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
||||
s = handle.DecodeFrom(&handle_value);
|
||||
assert(s.ok());
|
||||
auto filter_entry = GetFilter(true /* no io */);
|
||||
may_match =
|
||||
filter_entry.value == nullptr ||
|
||||
filter_entry.value->PrefixMayMatch(handle.offset(), internal_prefix);
|
||||
may_match = filter_entry.value == nullptr ||
|
||||
filter_entry.value->PrefixMayMatch(handle.offset(), prefix);
|
||||
filter_entry.Release(rep_->table_options.block_cache.get());
|
||||
}
|
||||
|
||||
@ -1105,9 +1104,8 @@ Status BlockBasedTable::Get(
|
||||
|
||||
BlockHandle handle;
|
||||
bool may_not_exist_in_filter =
|
||||
filter != nullptr &&
|
||||
handle.DecodeFrom(&handle_value).ok() &&
|
||||
!filter->KeyMayMatch(handle.offset(), key);
|
||||
filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
|
||||
!filter->KeyMayMatch(handle.offset(), ExtractUserKey(key));
|
||||
|
||||
if (may_not_exist_in_filter) {
|
||||
// Not found
|
||||
|
@ -41,10 +41,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
BlockBuilder::BlockBuilder(int block_restart_interval,
|
||||
const Comparator* comparator)
|
||||
BlockBuilder::BlockBuilder(int block_restart_interval)
|
||||
: block_restart_interval_(block_restart_interval),
|
||||
comparator_(comparator),
|
||||
restarts_(),
|
||||
counter_(0),
|
||||
finished_(false) {
|
||||
@ -96,8 +94,6 @@ void BlockBuilder::Add(const Slice& key, const Slice& value) {
|
||||
Slice last_key_piece(last_key_);
|
||||
assert(!finished_);
|
||||
assert(counter_ <= block_restart_interval_);
|
||||
assert(buffer_.empty() // No values yet?
|
||||
|| comparator_->Compare(key, last_key_piece) > 0);
|
||||
size_t shared = 0;
|
||||
if (counter_ < block_restart_interval_) {
|
||||
// See how much sharing to do with previous string
|
||||
|
@ -22,7 +22,7 @@ class BlockBuilder {
|
||||
BlockBuilder(const BlockBuilder&) = delete;
|
||||
void operator=(const BlockBuilder&) = delete;
|
||||
|
||||
BlockBuilder(int block_restart_interval, const Comparator* comparator);
|
||||
explicit BlockBuilder(int block_restart_interval);
|
||||
|
||||
// Reset the contents as if the BlockBuilder was just constructed.
|
||||
void Reset();
|
||||
@ -50,7 +50,6 @@ class BlockBuilder {
|
||||
|
||||
private:
|
||||
const int block_restart_interval_;
|
||||
const Comparator* comparator_;
|
||||
|
||||
std::string buffer_; // Destination buffer
|
||||
std::vector<uint32_t> restarts_; // Restart points
|
||||
|
@ -76,7 +76,7 @@ TEST(BlockTest, SimpleTest) {
|
||||
|
||||
std::vector<std::string> keys;
|
||||
std::vector<std::string> values;
|
||||
BlockBuilder builder(16, ic.get());
|
||||
BlockBuilder builder(16);
|
||||
int num_records = 100000;
|
||||
|
||||
GenerateRandomKVs(&keys, &values, 0, num_records);
|
||||
@ -132,8 +132,7 @@ BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
|
||||
const std::vector<std::string> &keys,
|
||||
const std::vector<std::string> &values,
|
||||
const int prefix_group_size = 1) {
|
||||
builder->reset(
|
||||
new BlockBuilder(1 /* restart interval */, BytewiseComparator()));
|
||||
builder->reset(new BlockBuilder(1 /* restart interval */));
|
||||
|
||||
// Add only half of the keys
|
||||
for (size_t i = 0; i < keys.size(); ++i) {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "table/block_builder.h"
|
||||
#include "table/cuckoo_table_factory.h"
|
||||
#include "table/format.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "util/autovector.h"
|
||||
@ -24,28 +25,32 @@
|
||||
namespace rocksdb {
|
||||
const std::string CuckooTablePropertyNames::kEmptyKey =
|
||||
"rocksdb.cuckoo.bucket.empty.key";
|
||||
const std::string CuckooTablePropertyNames::kNumHashTable =
|
||||
const std::string CuckooTablePropertyNames::kNumHashFunc =
|
||||
"rocksdb.cuckoo.hash.num";
|
||||
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
|
||||
"rocksdb.cuckoo.bucket.maxnum";
|
||||
const std::string CuckooTablePropertyNames::kHashTableSize =
|
||||
"rocksdb.cuckoo.hash.size";
|
||||
const std::string CuckooTablePropertyNames::kValueLength =
|
||||
"rocksdb.cuckoo.value.length";
|
||||
const std::string CuckooTablePropertyNames::kIsLastLevel =
|
||||
"rocksdb.cuckoo.file.islastlevel";
|
||||
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
|
||||
"rocksdb.cuckoo.hash.cuckooblocksize";
|
||||
|
||||
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
|
||||
|
||||
CuckooTableBuilder::CuckooTableBuilder(
|
||||
WritableFile* file, double hash_table_ratio,
|
||||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_table, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
|
||||
: num_hash_table_(2),
|
||||
: num_hash_func_(2),
|
||||
file_(file),
|
||||
hash_table_ratio_(hash_table_ratio),
|
||||
max_num_hash_table_(max_num_hash_table),
|
||||
max_hash_table_ratio_(max_hash_table_ratio),
|
||||
max_num_hash_func_(max_num_hash_table),
|
||||
max_search_depth_(max_search_depth),
|
||||
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
|
||||
hash_table_size_(2),
|
||||
is_last_level_file_(false),
|
||||
has_seen_first_key_(false),
|
||||
ucomp_(user_comparator),
|
||||
@ -86,7 +91,6 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
} else {
|
||||
kvs_.emplace_back(std::make_pair(key.ToString(), value.ToString()));
|
||||
}
|
||||
properties_.num_entries++;
|
||||
|
||||
// In order to fill the empty buckets in the hash table, we identify a
|
||||
// key which is not used so far (unused_user_key). We determine this by
|
||||
@ -98,11 +102,14 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
|
||||
} else if (ikey.user_key.compare(largest_user_key_) > 0) {
|
||||
largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size());
|
||||
}
|
||||
if (hash_table_size_ < kvs_.size() / max_hash_table_ratio_) {
|
||||
hash_table_size_ *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
|
||||
buckets->resize(num_buckets);
|
||||
uint64_t hash_table_size_minus_one = hash_table_size_ - 1;
|
||||
buckets->resize(hash_table_size_minus_one + cuckoo_block_size_);
|
||||
uint64_t make_space_for_key_call_id = 0;
|
||||
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
|
||||
uint64_t bucket_id;
|
||||
@ -110,39 +117,50 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
|
||||
autovector<uint64_t> hash_vals;
|
||||
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
|
||||
ExtractUserKey(kvs_[vector_idx].first);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_id = hash_val;
|
||||
bucket_found = true;
|
||||
break;
|
||||
} else {
|
||||
if (ucomp_->Compare(user_key, is_last_level_file_
|
||||
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
|
||||
: ExtractUserKey(
|
||||
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
|
||||
return Status::NotSupported("Same key is being inserted again.");
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
|
||||
++hash_cnt) {
|
||||
uint64_t hash_val = CuckooHash(user_key, hash_cnt,
|
||||
hash_table_size_minus_one, get_slice_hash_);
|
||||
// If there is a collision, check next cuckoo_block_size_ locations for
|
||||
// empty locations. While checking, if we reach end of the hash table,
|
||||
// stop searching and proceed for next hash function.
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++hash_val) {
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_id = hash_val;
|
||||
bucket_found = true;
|
||||
break;
|
||||
} else {
|
||||
if (ucomp_->Compare(user_key, is_last_level_file_
|
||||
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
|
||||
: ExtractUserKey(
|
||||
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
|
||||
return Status::NotSupported("Same key is being inserted again.");
|
||||
}
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
while (!bucket_found && !MakeSpaceForKey(hash_vals,
|
||||
++make_space_for_key_call_id, buckets, &bucket_id)) {
|
||||
// Rehash by increashing number of hash tables.
|
||||
if (num_hash_table_ >= max_num_hash_table_) {
|
||||
return Status::NotSupported("Too many collissions. Unable to hash.");
|
||||
if (num_hash_func_ >= max_num_hash_func_) {
|
||||
return Status::NotSupported("Too many collisions. Unable to hash.");
|
||||
}
|
||||
// We don't really need to rehash the entire table because old hashes are
|
||||
// still valid and we only increased the number of hash functions.
|
||||
uint64_t hash_val = get_slice_hash_(user_key,
|
||||
num_hash_table_, num_buckets);
|
||||
++num_hash_table_;
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_found = true;
|
||||
bucket_id = hash_val;
|
||||
break;
|
||||
} else {
|
||||
hash_vals.push_back(hash_val);
|
||||
uint64_t hash_val = CuckooHash(user_key, num_hash_func_,
|
||||
hash_table_size_minus_one, get_slice_hash_);
|
||||
++num_hash_func_;
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++hash_val) {
|
||||
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
|
||||
bucket_found = true;
|
||||
bucket_id = hash_val;
|
||||
break;
|
||||
} else {
|
||||
hash_vals.push_back(hash_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
(*buckets)[bucket_id].vector_idx = vector_idx;
|
||||
@ -154,13 +172,14 @@ Status CuckooTableBuilder::Finish() {
|
||||
assert(!closed_);
|
||||
closed_ = true;
|
||||
std::vector<CuckooBucket> buckets;
|
||||
Status s = MakeHashTable(&buckets);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
// Determine unused_user_key to fill empty buckets.
|
||||
Status s;
|
||||
std::string unused_bucket;
|
||||
if (!kvs_.empty()) {
|
||||
s = MakeHashTable(&buckets);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
// Determine unused_user_key to fill empty buckets.
|
||||
std::string unused_user_key = smallest_user_key_;
|
||||
int curr_pos = unused_user_key.size() - 1;
|
||||
while (curr_pos >= 0) {
|
||||
@ -192,6 +211,7 @@ Status CuckooTableBuilder::Finish() {
|
||||
AppendInternalKey(&unused_bucket, ikey);
|
||||
}
|
||||
}
|
||||
properties_.num_entries = kvs_.size();
|
||||
properties_.fixed_key_len = unused_bucket.size();
|
||||
uint32_t value_length = kvs_.empty() ? 0 : kvs_[0].second.size();
|
||||
uint32_t bucket_size = value_length + properties_.fixed_key_len;
|
||||
@ -226,16 +246,22 @@ Status CuckooTableBuilder::Finish() {
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kNumHashTable].assign(
|
||||
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
|
||||
uint64_t num_buckets = buckets.size();
|
||||
CuckooTablePropertyNames::kNumHashFunc].assign(
|
||||
reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
|
||||
|
||||
uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].assign(
|
||||
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
|
||||
CuckooTablePropertyNames::kHashTableSize].assign(
|
||||
reinterpret_cast<const char*>(&hash_table_size),
|
||||
sizeof(hash_table_size));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kIsLastLevel].assign(
|
||||
reinterpret_cast<const char*>(&is_last_level_file_),
|
||||
sizeof(is_last_level_file_));
|
||||
properties_.user_collected_properties[
|
||||
CuckooTablePropertyNames::kCuckooBlockSize].assign(
|
||||
reinterpret_cast<const char*>(&cuckoo_block_size_),
|
||||
sizeof(cuckoo_block_size_));
|
||||
|
||||
// Write meta blocks.
|
||||
MetaIndexBuilder meta_index_builder;
|
||||
@ -279,7 +305,7 @@ void CuckooTableBuilder::Abandon() {
|
||||
}
|
||||
|
||||
uint64_t CuckooTableBuilder::NumEntries() const {
|
||||
return properties_.num_entries;
|
||||
return kvs_.size();
|
||||
}
|
||||
|
||||
uint64_t CuckooTableBuilder::FileSize() const {
|
||||
@ -288,11 +314,17 @@ uint64_t CuckooTableBuilder::FileSize() const {
|
||||
} else if (properties_.num_entries == 0) {
|
||||
return 0;
|
||||
}
|
||||
// This is not the actual size of the file as we need to account for
|
||||
// hash table ratio. This returns the size of filled buckets in the table
|
||||
// scaled up by a factor of 1/hash_table_ratio.
|
||||
return ((kvs_[0].first.size() + kvs_[0].second.size()) *
|
||||
properties_.num_entries) / hash_table_ratio_;
|
||||
|
||||
// Account for buckets being a power of two.
|
||||
// As elements are added, file size remains constant for a while and doubles
|
||||
// its size. Since compaction algorithm stops adding elements only after it
|
||||
// exceeds the file limit, we account for the extra element being added here.
|
||||
uint64_t expected_hash_table_size = hash_table_size_;
|
||||
if (expected_hash_table_size < (kvs_.size() + 1) / max_hash_table_ratio_) {
|
||||
expected_hash_table_size *= 2;
|
||||
}
|
||||
return (kvs_[0].first.size() + kvs_[0].second.size()) *
|
||||
expected_hash_table_size;
|
||||
}
|
||||
|
||||
// This method is invoked when there is no place to insert the target key.
|
||||
@ -322,17 +354,19 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
std::vector<CuckooNode> tree;
|
||||
// We want to identify already visited buckets in the current method call so
|
||||
// that we don't add same buckets again for exploration in the tree.
|
||||
// We do this by maintaining a count of current method call, which acts as a
|
||||
// unique id for this invocation of the method. We store this number into
|
||||
// the nodes that we explore in current method call.
|
||||
// We do this by maintaining a count of current method call in
|
||||
// make_space_for_key_call_id, which acts as a unique id for this invocation
|
||||
// of the method. We store this number into the nodes that we explore in
|
||||
// current method call.
|
||||
// It is unlikely for the increment operation to overflow because the maximum
|
||||
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
// no. of times this will be called is <= max_num_hash_func_ + kvs_.size().
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
|
||||
uint64_t bucket_id = hash_vals[hash_cnt];
|
||||
(*buckets)[bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(bucket_id, 0, 0));
|
||||
}
|
||||
uint64_t hash_table_size_minus_one = hash_table_size_ - 1;
|
||||
bool null_found = false;
|
||||
uint32_t curr_pos = 0;
|
||||
while (!null_found && curr_pos < tree.size()) {
|
||||
@ -342,22 +376,27 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
break;
|
||||
}
|
||||
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
|
||||
uint64_t child_bucket_id = get_slice_hash_(
|
||||
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
|
||||
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
|
||||
hash_cnt, buckets->size());
|
||||
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id) {
|
||||
continue;
|
||||
}
|
||||
(*buckets)[child_bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
|
||||
curr_pos));
|
||||
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
|
||||
null_found = true;
|
||||
break;
|
||||
for (uint32_t hash_cnt = 0;
|
||||
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
|
||||
uint64_t child_bucket_id = CuckooHash(
|
||||
(is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first :
|
||||
ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first))),
|
||||
hash_cnt, hash_table_size_minus_one, get_slice_hash_);
|
||||
// Iterate inside Cuckoo Block.
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, ++child_bucket_id) {
|
||||
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
|
||||
make_space_for_key_call_id) {
|
||||
continue;
|
||||
}
|
||||
(*buckets)[child_bucket_id].make_space_for_key_call_id =
|
||||
make_space_for_key_call_id;
|
||||
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
|
||||
curr_pos));
|
||||
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
|
||||
null_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
++curr_pos;
|
||||
@ -367,10 +406,10 @@ bool CuckooTableBuilder::MakeSpaceForKey(
|
||||
// There is an empty node in tree.back(). Now, traverse the path from this
|
||||
// empty node to top of the tree and at every node in the path, replace
|
||||
// child with the parent. Stop when first level is reached in the tree
|
||||
// (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return
|
||||
// (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
|
||||
// this location in first level for target key to be inserted.
|
||||
uint32_t bucket_to_replace_pos = tree.size()-1;
|
||||
while (bucket_to_replace_pos >= num_hash_table_) {
|
||||
while (bucket_to_replace_pos >= num_hash_func_) {
|
||||
CuckooNode& curr_node = tree[bucket_to_replace_pos];
|
||||
(*buckets)[curr_node.bucket_id] =
|
||||
(*buckets)[tree[curr_node.parent_pos].bucket_id];
|
||||
|
@ -21,8 +21,9 @@ namespace rocksdb {
|
||||
class CuckooTableBuilder: public TableBuilder {
|
||||
public:
|
||||
CuckooTableBuilder(
|
||||
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
|
||||
uint32_t max_search_depth, const Comparator* user_comparator,
|
||||
WritableFile* file, double max_hash_table_ratio,
|
||||
uint32_t max_num_hash_func, uint32_t max_search_depth,
|
||||
const Comparator* user_comparator, uint32_t cuckoo_block_size,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
|
||||
|
||||
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||
@ -60,7 +61,7 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
CuckooBucket()
|
||||
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
|
||||
uint32_t vector_idx;
|
||||
// This number will not exceed kvs_.size() + max_num_hash_table_.
|
||||
// This number will not exceed kvs_.size() + max_num_hash_func_.
|
||||
// We assume number of items is <= 2^32.
|
||||
uint32_t make_space_for_key_call_id;
|
||||
};
|
||||
@ -73,11 +74,13 @@ class CuckooTableBuilder: public TableBuilder {
|
||||
uint64_t* bucket_id);
|
||||
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
|
||||
|
||||
uint32_t num_hash_table_;
|
||||
uint32_t num_hash_func_;
|
||||
WritableFile* file_;
|
||||
const double hash_table_ratio_;
|
||||
const uint32_t max_num_hash_table_;
|
||||
const double max_hash_table_ratio_;
|
||||
const uint32_t max_num_hash_func_;
|
||||
const uint32_t max_search_depth_;
|
||||
const uint32_t cuckoo_block_size_;
|
||||
uint64_t hash_table_size_;
|
||||
bool is_last_level_file_;
|
||||
Status status_;
|
||||
std::vector<std::pair<std::string, std::string>> kvs_;
|
||||
|
@ -37,8 +37,9 @@ class CuckooBuilderTest {
|
||||
void CheckFileContents(const std::vector<std::string>& keys,
|
||||
const std::vector<std::string>& values,
|
||||
const std::vector<uint64_t>& expected_locations,
|
||||
std::string expected_unused_bucket, uint64_t expected_max_buckets,
|
||||
uint32_t expected_num_hash_fun, bool expected_is_last_level) {
|
||||
std::string expected_unused_bucket, uint64_t expected_table_size,
|
||||
uint32_t expected_num_hash_func, bool expected_is_last_level,
|
||||
uint32_t expected_cuckoo_block_size = 1) {
|
||||
// Read file
|
||||
unique_ptr<RandomAccessFile> read_file;
|
||||
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
|
||||
@ -51,7 +52,8 @@ class CuckooBuilderTest {
|
||||
kCuckooTableMagicNumber, env_, nullptr, &props));
|
||||
ASSERT_EQ(props->num_entries, keys.size());
|
||||
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
|
||||
ASSERT_EQ(props->data_size, keys.size()*expected_unused_bucket.size());
|
||||
ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
|
||||
(expected_table_size + expected_cuckoo_block_size - 1));
|
||||
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
|
||||
|
||||
// Check unused bucket.
|
||||
@ -65,14 +67,18 @@ class CuckooBuilderTest {
|
||||
CuckooTablePropertyNames::kValueLength].data());
|
||||
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
|
||||
ASSERT_EQ(props->raw_value_size, values.size()*value_len_found);
|
||||
const uint64_t max_buckets =
|
||||
const uint64_t table_size =
|
||||
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kMaxNumBuckets].data());
|
||||
ASSERT_EQ(expected_max_buckets, max_buckets);
|
||||
const uint32_t num_hash_fun_found =
|
||||
CuckooTablePropertyNames::kHashTableSize].data());
|
||||
ASSERT_EQ(expected_table_size, table_size);
|
||||
const uint32_t num_hash_func_found =
|
||||
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kNumHashTable].data());
|
||||
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
|
||||
CuckooTablePropertyNames::kNumHashFunc].data());
|
||||
ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
|
||||
const uint32_t cuckoo_block_size =
|
||||
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kCuckooBlockSize].data());
|
||||
ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
|
||||
const bool is_last_level_found =
|
||||
*reinterpret_cast<const bool*>(props->user_collected_properties[
|
||||
CuckooTablePropertyNames::kIsLastLevel].data());
|
||||
@ -82,7 +88,7 @@ class CuckooBuilderTest {
|
||||
// Check contents of the bucket.
|
||||
std::vector<bool> keys_found(keys.size(), false);
|
||||
uint32_t bucket_size = expected_unused_bucket.size();
|
||||
for (uint32_t i = 0; i < max_buckets; ++i) {
|
||||
for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) {
|
||||
Slice read_slice;
|
||||
ASSERT_OK(read_file->Read(i*bucket_size, bucket_size,
|
||||
&read_slice, nullptr));
|
||||
@ -108,6 +114,14 @@ class CuckooBuilderTest {
|
||||
return ikey.GetKey().ToString();
|
||||
}
|
||||
|
||||
uint64_t NextPowOf2(uint64_t num) {
|
||||
uint64_t n = 2;
|
||||
while (n <= num) {
|
||||
n *= 2;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
Env* env_;
|
||||
EnvOptions env_options_;
|
||||
std::string fname;
|
||||
@ -116,10 +130,10 @@ class CuckooBuilderTest {
|
||||
|
||||
TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
fname = test::TmpDir() + "/EmptyFile";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
4, 100, BytewiseComparator(), GetSliceHash);
|
||||
4, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
@ -146,7 +160,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
||||
fname = test::TmpDir() + "/NoCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -156,11 +170,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, false);
|
||||
expected_unused_bucket, expected_table_size, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
||||
@ -183,7 +197,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -193,11 +207,49 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 4, false);
|
||||
expected_unused_bucket, expected_table_size, 4, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
|
||||
uint32_t num_hash_fun = 4;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1, 2, 3}},
|
||||
{user_keys[1], {0, 1, 2, 3}},
|
||||
{user_keys[2], {0, 1, 2, 3}},
|
||||
{user_keys[3], {0, 1, 2, 3}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
|
||||
std::vector<std::string> keys;
|
||||
for (auto& user_key : user_keys) {
|
||||
keys.push_back(GetInternalKey(user_key, false));
|
||||
}
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
uint32_t cuckoo_block_size = 2;
|
||||
fname = test::TmpDir() + "/WithCollisionFullKey2";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
||||
@ -225,7 +277,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathFullKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
@ -235,11 +287,50 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, false);
|
||||
expected_unused_bucket, expected_table_size, 2, false);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
|
||||
uint32_t num_hash_fun = 2;
|
||||
std::vector<std::string> user_keys = {"key01", "key02", "key03",
|
||||
"key04", "key05"};
|
||||
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
|
||||
hash_map = {
|
||||
{user_keys[0], {0, 1}},
|
||||
{user_keys[1], {1, 2}},
|
||||
{user_keys[2], {3, 4}},
|
||||
{user_keys[3], {4, 5}},
|
||||
{user_keys[4], {0, 3}},
|
||||
};
|
||||
std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
|
||||
std::vector<std::string> keys;
|
||||
for (auto& user_key : user_keys) {
|
||||
keys.push_back(GetInternalKey(user_key, false));
|
||||
}
|
||||
|
||||
unique_ptr<WritableFile> writable_file;
|
||||
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(keys[i]), Slice(values[i]));
|
||||
ASSERT_EQ(builder.NumEntries(), i + 1);
|
||||
ASSERT_OK(builder.status());
|
||||
}
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_table_size = NextPowOf2(keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = GetInternalKey("key00", true);
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_table_size, 2, false, 2);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
||||
@ -258,7 +349,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
||||
fname = test::TmpDir() + "/NoCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -268,11 +359,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = "key00";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, true);
|
||||
expected_unused_bucket, expected_table_size, 2, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
||||
@ -291,7 +382,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -301,11 +392,11 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = "key00";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 4, true);
|
||||
expected_unused_bucket, expected_table_size, 4, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
||||
@ -326,7 +417,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
|
||||
@ -336,11 +427,11 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
|
||||
ASSERT_OK(builder.Finish());
|
||||
ASSERT_OK(writable_file->Close());
|
||||
|
||||
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
|
||||
uint32_t expected_table_size = NextPowOf2(user_keys.size() / kHashTableRatio);
|
||||
std::string expected_unused_bucket = "key00";
|
||||
expected_unused_bucket += std::string(values[0].size(), 'a');
|
||||
CheckFileContents(user_keys, values, expected_locations,
|
||||
expected_unused_bucket, expected_max_buckets, 2, true);
|
||||
expected_unused_bucket, expected_table_size, 2, true);
|
||||
}
|
||||
|
||||
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
@ -362,7 +453,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
|
||||
fname = test::TmpDir() + "/WithCollisionPathUserKey";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t i = 0; i < user_keys.size(); i++) {
|
||||
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
|
||||
@ -382,7 +473,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
|
||||
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
|
||||
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
|
||||
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
|
||||
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
|
||||
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
|
||||
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
|
||||
|
@ -9,34 +9,14 @@
|
||||
#include "db/dbformat.h"
|
||||
#include "table/cuckoo_table_builder.h"
|
||||
#include "table/cuckoo_table_reader.h"
|
||||
#include "util/murmurhash.h"
|
||||
|
||||
namespace rocksdb {
|
||||
extern const uint32_t kMaxNumHashTable = 64;
|
||||
|
||||
extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets) {
|
||||
static constexpr uint32_t seeds[kMaxNumHashTable] = {
|
||||
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
|
||||
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
|
||||
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,
|
||||
263144466, 241420041, 444294464, 731606396, 304158902, 563235655,
|
||||
968740453, 336996831, 462831574, 407970157, 985877240, 637708754,
|
||||
736932700, 205026023, 755371467, 729648411, 807744117, 46482135,
|
||||
847092855, 620960699, 102476362, 314094354, 625838942, 550889395,
|
||||
639071379, 834567510, 397667304, 151945969, 443634243, 196618243,
|
||||
421986347, 407218337, 964502417, 327741231, 493359459, 452453139,
|
||||
692216398, 108161624, 816246924, 234779764, 618949448, 496133787,
|
||||
156374056, 316589799, 982915425, 553105889 };
|
||||
return MurmurHash(s.data(), s.size(), seeds[index]) % max_num_buckets;
|
||||
}
|
||||
|
||||
Status CuckooTableFactory::NewTableReader(const Options& options,
|
||||
const EnvOptions& soptions, const InternalKeyComparator& icomp,
|
||||
std::unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
|
||||
std::unique_ptr<TableReader>* table) const {
|
||||
std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(options,
|
||||
std::move(file), file_size, icomp.user_comparator(), GetSliceMurmurHash));
|
||||
std::move(file), file_size, icomp.user_comparator(), nullptr));
|
||||
Status s = new_reader->status();
|
||||
if (s.ok()) {
|
||||
*table = std::move(new_reader);
|
||||
@ -47,9 +27,8 @@ Status CuckooTableFactory::NewTableReader(const Options& options,
|
||||
TableBuilder* CuckooTableFactory::NewTableBuilder(
|
||||
const Options& options, const InternalKeyComparator& internal_comparator,
|
||||
WritableFile* file, CompressionType compression_type) const {
|
||||
return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable,
|
||||
max_search_depth_, internal_comparator.user_comparator(),
|
||||
GetSliceMurmurHash);
|
||||
return new CuckooTableBuilder(file, hash_table_ratio_, 64, max_search_depth_,
|
||||
internal_comparator.user_comparator(), cuckoo_block_size_, nullptr);
|
||||
}
|
||||
|
||||
std::string CuckooTableFactory::GetPrintableTableOptions() const {
|
||||
@ -64,12 +43,16 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const {
|
||||
snprintf(buffer, kBufferSize, " max_search_depth: %u\n",
|
||||
max_search_depth_);
|
||||
ret.append(buffer);
|
||||
snprintf(buffer, kBufferSize, " cuckoo_block_size: %u\n",
|
||||
cuckoo_block_size_);
|
||||
ret.append(buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
TableFactory* NewCuckooTableFactory(double hash_table_ratio,
|
||||
uint32_t max_search_depth) {
|
||||
return new CuckooTableFactory(hash_table_ratio, max_search_depth);
|
||||
uint32_t max_search_depth, uint32_t cuckoo_block_size) {
|
||||
return new CuckooTableFactory(
|
||||
hash_table_ratio, max_search_depth, cuckoo_block_size);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -8,11 +8,23 @@
|
||||
|
||||
#include <string>
|
||||
#include "rocksdb/table.h"
|
||||
#include "util/murmurhash.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
const uint32_t kCuckooMurmurSeedMultiplier = 816922183;
|
||||
static inline uint64_t CuckooHash(
|
||||
const Slice& user_key, uint32_t hash_cnt, uint64_t table_size_minus_one,
|
||||
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) {
|
||||
#ifndef NDEBUG
|
||||
// This part is used only in unit tests.
|
||||
if (get_slice_hash != nullptr) {
|
||||
return get_slice_hash(user_key, hash_cnt, table_size_minus_one + 1);
|
||||
}
|
||||
#endif
|
||||
return MurmurHash(user_key.data(), user_key.size(),
|
||||
kCuckooMurmurSeedMultiplier * hash_cnt) & table_size_minus_one;
|
||||
}
|
||||
|
||||
// Cuckoo Table is designed for applications that require fast point lookups
|
||||
// but not fast range scans.
|
||||
@ -23,9 +35,11 @@ extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
|
||||
// - Does not support Merge operations.
|
||||
class CuckooTableFactory : public TableFactory {
|
||||
public:
|
||||
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth)
|
||||
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth,
|
||||
uint32_t cuckoo_block_size)
|
||||
: hash_table_ratio_(hash_table_ratio),
|
||||
max_search_depth_(max_search_depth) {}
|
||||
max_search_depth_(max_search_depth),
|
||||
cuckoo_block_size_(cuckoo_block_size) {}
|
||||
~CuckooTableFactory() {}
|
||||
|
||||
const char* Name() const override { return "CuckooTable"; }
|
||||
@ -41,7 +55,7 @@ class CuckooTableFactory : public TableFactory {
|
||||
CompressionType compression_type) const override;
|
||||
|
||||
// Sanitizes the specified DB Options.
|
||||
Status SanitizeDBOptions(DBOptions* db_opts) const override {
|
||||
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -50,6 +64,7 @@ class CuckooTableFactory : public TableFactory {
|
||||
private:
|
||||
const double hash_table_ratio_;
|
||||
const uint32_t max_search_depth_;
|
||||
const uint32_t cuckoo_block_size_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -17,10 +17,14 @@
|
||||
#include <vector>
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "table/meta_blocks.h"
|
||||
#include "table/cuckoo_table_factory.h"
|
||||
#include "util/arena.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace {
|
||||
static const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
|
||||
}
|
||||
|
||||
extern const uint64_t kCuckooTableMagicNumber;
|
||||
|
||||
@ -44,12 +48,12 @@ CuckooTableReader::CuckooTableReader(
|
||||
}
|
||||
table_props_.reset(props);
|
||||
auto& user_props = props->user_collected_properties;
|
||||
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashTable);
|
||||
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
|
||||
if (hash_funs == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Number of hash functions not found");
|
||||
return;
|
||||
}
|
||||
num_hash_fun_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
|
||||
num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
|
||||
auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
|
||||
if (unused_key == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Empty bucket value not found");
|
||||
@ -67,18 +71,29 @@ CuckooTableReader::CuckooTableReader(
|
||||
value_length->second.data());
|
||||
bucket_length_ = key_length_ + value_length_;
|
||||
|
||||
auto num_buckets = user_props.find(CuckooTablePropertyNames::kMaxNumBuckets);
|
||||
if (num_buckets == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Num buckets not found");
|
||||
auto hash_table_size = user_props.find(
|
||||
CuckooTablePropertyNames::kHashTableSize);
|
||||
if (hash_table_size == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Hash table size not found");
|
||||
return;
|
||||
}
|
||||
num_buckets_ = *reinterpret_cast<const uint64_t*>(num_buckets->second.data());
|
||||
table_size_minus_one_ = *reinterpret_cast<const uint64_t*>(
|
||||
hash_table_size->second.data()) - 1;
|
||||
auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
|
||||
if (is_last_level == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Is last level not found");
|
||||
return;
|
||||
}
|
||||
is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
|
||||
auto cuckoo_block_size = user_props.find(
|
||||
CuckooTablePropertyNames::kCuckooBlockSize);
|
||||
if (cuckoo_block_size == user_props.end()) {
|
||||
status_ = Status::InvalidArgument("Cuckoo block size not found");
|
||||
return;
|
||||
}
|
||||
cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
|
||||
cuckoo_block_size->second.data());
|
||||
cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
|
||||
status_ = file_->Read(0, file_size, &file_data_, nullptr);
|
||||
}
|
||||
|
||||
@ -89,40 +104,46 @@ Status CuckooTableReader::Get(
|
||||
void (*mark_key_may_exist_handler)(void* handle_context)) {
|
||||
assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
|
||||
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_);
|
||||
assert(hash_val < num_buckets_);
|
||||
const char* bucket = &file_data_.data()[hash_val * bucket_length_];
|
||||
if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()),
|
||||
Slice(bucket, user_key.size())) == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
// Here, we compare only the user key part as we support only one entry
|
||||
// per user key and we don't support sanpshot.
|
||||
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
|
||||
Slice value = Slice(&bucket[key_length_], value_length_);
|
||||
if (is_last_level_) {
|
||||
ParsedInternalKey found_ikey(Slice(bucket, key_length_), 0, kTypeValue);
|
||||
result_handler(handle_context, found_ikey, value);
|
||||
} else {
|
||||
Slice full_key(bucket, key_length_);
|
||||
ParsedInternalKey found_ikey;
|
||||
ParseInternalKey(full_key, &found_ikey);
|
||||
result_handler(handle_context, found_ikey, value);
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
|
||||
uint64_t offset = bucket_length_ * CuckooHash(
|
||||
user_key, hash_cnt, table_size_minus_one_, get_slice_hash_);
|
||||
const char* bucket = &file_data_.data()[offset];
|
||||
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
|
||||
++block_idx, bucket += bucket_length_) {
|
||||
if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()),
|
||||
Slice(bucket, user_key.size())) == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
// Here, we compare only the user key part as we support only one entry
|
||||
// per user key and we don't support sanpshot.
|
||||
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
|
||||
Slice value = Slice(&bucket[key_length_], value_length_);
|
||||
if (is_last_level_) {
|
||||
ParsedInternalKey found_ikey(
|
||||
Slice(bucket, key_length_), 0, kTypeValue);
|
||||
result_handler(handle_context, found_ikey, value);
|
||||
} else {
|
||||
Slice full_key(bucket, key_length_);
|
||||
ParsedInternalKey found_ikey;
|
||||
ParseInternalKey(full_key, &found_ikey);
|
||||
result_handler(handle_context, found_ikey, value);
|
||||
}
|
||||
// We don't support merge operations. So, we return here.
|
||||
return Status::OK();
|
||||
}
|
||||
// We don't support merge operations. So, we return here.
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void CuckooTableReader::Prepare(const Slice& key) {
|
||||
// Prefetch the first Cuckoo Block.
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
// Prefetching first location also helps improve Get performance.
|
||||
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
|
||||
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_);
|
||||
PREFETCH(&file_data_.data()[hash_val * bucket_length_], 0, 3);
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) +
|
||||
bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_, nullptr);
|
||||
uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
|
||||
for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
|
||||
PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,7 +207,9 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
|
||||
|
||||
void CuckooTableIterator::LoadKeysFromReader() {
|
||||
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
|
||||
for (uint32_t bucket_id = 0; bucket_id < reader_->num_buckets_; bucket_id++) {
|
||||
uint64_t num_buckets = reader_->table_size_minus_one_ +
|
||||
reader_->cuckoo_block_size_;
|
||||
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
|
||||
Slice read_key;
|
||||
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
|
||||
reader_->key_length_, &read_key, nullptr);
|
||||
|
@ -65,12 +65,14 @@ class CuckooTableReader: public TableReader {
|
||||
bool is_last_level_;
|
||||
std::shared_ptr<const TableProperties> table_props_;
|
||||
Status status_;
|
||||
uint32_t num_hash_fun_;
|
||||
uint32_t num_hash_func_;
|
||||
std::string unused_key_;
|
||||
uint32_t key_length_;
|
||||
uint32_t value_length_;
|
||||
uint32_t bucket_length_;
|
||||
uint64_t num_buckets_;
|
||||
uint32_t cuckoo_block_size_;
|
||||
uint32_t cuckoo_block_bytes_minus_one_;
|
||||
uint64_t table_size_minus_one_;
|
||||
const Comparator* ucomp_;
|
||||
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
|
||||
uint64_t max_num_buckets);
|
||||
|
@ -38,9 +38,6 @@ DEFINE_bool(write, false,
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
extern const uint64_t kCuckooTableMagicNumber;
|
||||
extern const uint64_t kMaxNumHashTable;
|
||||
|
||||
namespace {
|
||||
const uint32_t kNumHashFunc = 10;
|
||||
// Methods, variables related to Hash functions.
|
||||
@ -109,7 +106,7 @@ class CuckooReaderTest {
|
||||
std::unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, GetSliceHash);
|
||||
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
|
||||
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
|
||||
@ -397,13 +394,12 @@ void GetKeys(uint64_t num, std::vector<std::string>* keys) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetFileName(uint64_t num, double hash_ratio) {
|
||||
std::string GetFileName(uint64_t num) {
|
||||
if (FLAGS_file_dir.empty()) {
|
||||
FLAGS_file_dir = test::TmpDir();
|
||||
}
|
||||
return FLAGS_file_dir + "/cuckoo_read_benchmark" +
|
||||
std::to_string(num/1000000) + "Mratio" +
|
||||
std::to_string(static_cast<int>(100*hash_ratio));
|
||||
std::to_string(num/1000000) + "Mkeys";
|
||||
}
|
||||
|
||||
// Create last level file as we are interested in measuring performance of
|
||||
@ -414,13 +410,13 @@ void WriteFile(const std::vector<std::string>& keys,
|
||||
options.allow_mmap_reads = true;
|
||||
Env* env = options.env;
|
||||
EnvOptions env_options = EnvOptions(options);
|
||||
std::string fname = GetFileName(num, hash_ratio);
|
||||
std::string fname = GetFileName(num);
|
||||
|
||||
std::unique_ptr<WritableFile> writable_file;
|
||||
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
|
||||
CuckooTableBuilder builder(
|
||||
writable_file.get(), hash_ratio,
|
||||
kMaxNumHashTable, 1000, test::Uint64Comparator(), GetSliceMurmurHash);
|
||||
64, 1000, test::Uint64Comparator(), 5, nullptr);
|
||||
ASSERT_OK(builder.status());
|
||||
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
|
||||
// Value is just a part of key.
|
||||
@ -439,27 +435,25 @@ void WriteFile(const std::vector<std::string>& keys,
|
||||
|
||||
CuckooTableReader reader(
|
||||
options, std::move(read_file), file_size,
|
||||
test::Uint64Comparator(), GetSliceMurmurHash);
|
||||
test::Uint64Comparator(), nullptr);
|
||||
ASSERT_OK(reader.status());
|
||||
ReadOptions r_options;
|
||||
for (const auto& key : keys) {
|
||||
for (uint64_t i = 0; i < num; ++i) {
|
||||
int cnt = 0;
|
||||
ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr));
|
||||
ASSERT_OK(reader.Get(r_options, Slice(keys[i]), &cnt, CheckValue, nullptr));
|
||||
if (cnt != 1) {
|
||||
fprintf(stderr, "%" PRIx64 " not found.\n",
|
||||
*reinterpret_cast<const uint64_t*>(key.data()));
|
||||
fprintf(stderr, "%" PRIu64 " not found.\n", i);
|
||||
ASSERT_EQ(1, cnt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
|
||||
double hash_ratio, uint32_t batch_size) {
|
||||
void ReadKeys(uint64_t num, uint32_t batch_size) {
|
||||
Options options;
|
||||
options.allow_mmap_reads = true;
|
||||
Env* env = options.env;
|
||||
EnvOptions env_options = EnvOptions(options);
|
||||
std::string fname = GetFileName(num, hash_ratio);
|
||||
std::string fname = GetFileName(num);
|
||||
|
||||
uint64_t file_size;
|
||||
env->GetFileSize(fname, &file_size);
|
||||
@ -468,29 +462,33 @@ void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
|
||||
|
||||
CuckooTableReader reader(
|
||||
options, std::move(read_file), file_size, test::Uint64Comparator(),
|
||||
GetSliceMurmurHash);
|
||||
nullptr);
|
||||
ASSERT_OK(reader.status());
|
||||
const UserCollectedProperties user_props =
|
||||
reader.GetTableProperties()->user_collected_properties;
|
||||
const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>(
|
||||
user_props.at(CuckooTablePropertyNames::kNumHashTable).data());
|
||||
fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of"
|
||||
" hash functions used: %u.\n", num, hash_ratio, num_hash_fun);
|
||||
user_props.at(CuckooTablePropertyNames::kNumHashFunc).data());
|
||||
const uint64_t table_size = *reinterpret_cast<const uint64_t*>(
|
||||
user_props.at(CuckooTablePropertyNames::kHashTableSize).data());
|
||||
fprintf(stderr, "With %" PRIu64 " items, utilization is %.2f%%, number of"
|
||||
" hash functions: %u.\n", num, num * 100.0 / (table_size), num_hash_fun);
|
||||
ReadOptions r_options;
|
||||
|
||||
uint64_t start_time = env->NowMicros();
|
||||
if (batch_size > 0) {
|
||||
for (uint64_t i = 0; i < num; i += batch_size) {
|
||||
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
||||
reader.Prepare(Slice(keys[j]));
|
||||
reader.Prepare(Slice(reinterpret_cast<char*>(&j), 16));
|
||||
}
|
||||
for (uint64_t j = i; j < i+batch_size && j < num; ++j) {
|
||||
reader.Get(r_options, Slice(keys[j]), nullptr, DoNothing, nullptr);
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&j), 16),
|
||||
nullptr, DoNothing, nullptr);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint64_t i = 0; i < num; i++) {
|
||||
reader.Get(r_options, Slice(keys[i]), nullptr, DoNothing, nullptr);
|
||||
reader.Get(r_options, Slice(reinterpret_cast<char*>(&i), 16), nullptr,
|
||||
DoNothing, nullptr);
|
||||
}
|
||||
}
|
||||
float time_per_op = (env->NowMicros() - start_time) * 1.0 / num;
|
||||
@ -501,26 +499,30 @@ void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
|
||||
} // namespace.
|
||||
|
||||
TEST(CuckooReaderTest, TestReadPerformance) {
|
||||
uint64_t num = 1000*1000*100;
|
||||
if (!FLAGS_enable_perf) {
|
||||
return;
|
||||
}
|
||||
double hash_ratio = 0.95;
|
||||
// These numbers are chosen to have a hash utilizaiton % close to
|
||||
// 0.9, 0.75, 0.6 and 0.5 respectively.
|
||||
// They all create 128 M buckets.
|
||||
std::vector<uint64_t> nums = {120*1000*1000, 100*1000*1000, 80*1000*1000,
|
||||
70*1000*1000};
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout,
|
||||
"WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n");
|
||||
#endif
|
||||
std::vector<std::string> keys;
|
||||
GetKeys(num, &keys);
|
||||
for (double hash_ratio : std::vector<double>({0.5, 0.6, 0.75, 0.9})) {
|
||||
if (FLAGS_write || !Env::Default()->FileExists(
|
||||
GetFileName(num, hash_ratio))) {
|
||||
GetKeys(*std::max_element(nums.begin(), nums.end()), &keys);
|
||||
for (uint64_t num : nums) {
|
||||
if (FLAGS_write || !Env::Default()->FileExists(GetFileName(num))) {
|
||||
WriteFile(keys, num, hash_ratio);
|
||||
}
|
||||
ReadKeys(keys, num, hash_ratio, 0);
|
||||
ReadKeys(keys, num, hash_ratio, 10);
|
||||
ReadKeys(keys, num, hash_ratio, 25);
|
||||
ReadKeys(keys, num, hash_ratio, 50);
|
||||
ReadKeys(keys, num, hash_ratio, 100);
|
||||
ReadKeys(num, 0);
|
||||
ReadKeys(num, 10);
|
||||
ReadKeys(num, 25);
|
||||
ReadKeys(num, 50);
|
||||
ReadKeys(num, 100);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
@ -71,20 +71,14 @@ void FilterBlockBuilder::AddKey(const Slice& key) {
|
||||
}
|
||||
|
||||
// add prefix to filter if needed
|
||||
if (prefix_extractor_ && prefix_extractor_->InDomain(ExtractUserKey(key))) {
|
||||
// If prefix_extractor_, this filter_block layer assumes we only
|
||||
// operate on internal keys.
|
||||
Slice user_key = ExtractUserKey(key);
|
||||
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
|
||||
// this assumes prefix(prefix(key)) == prefix(key), as the last
|
||||
// entry in entries_ may be either a key or prefix, and we use
|
||||
// prefix(last entry) to get the prefix of the last key.
|
||||
if (prev.size() == 0 ||
|
||||
!SamePrefix(user_key, ExtractUserKey(prev))) {
|
||||
Slice prefix = prefix_extractor_->Transform(user_key);
|
||||
InternalKey internal_prefix_tmp(prefix, 0, kTypeValue);
|
||||
Slice internal_prefix = internal_prefix_tmp.Encode();
|
||||
if (prev.size() == 0 || !SamePrefix(key, prev)) {
|
||||
Slice prefix = prefix_extractor_->Transform(key);
|
||||
start_.push_back(entries_.size());
|
||||
entries_.append(internal_prefix.data(), internal_prefix.size());
|
||||
entries_.append(prefix.data(), prefix.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -211,10 +211,13 @@ Status ReadBlock(RandomAccessFile* file, const Footer& footer,
|
||||
const ReadOptions& options, const BlockHandle& handle,
|
||||
Slice* contents, /* result of reading */ char* buf) {
|
||||
size_t n = static_cast<size_t>(handle.size());
|
||||
Status s;
|
||||
|
||||
{
|
||||
PERF_TIMER_GUARD(block_read_time);
|
||||
s = file->Read(handle.offset(), n + kBlockTrailerSize, contents, buf);
|
||||
}
|
||||
|
||||
PERF_TIMER_AUTO(block_read_time);
|
||||
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, contents, buf);
|
||||
PERF_TIMER_MEASURE(block_read_time);
|
||||
PERF_COUNTER_ADD(block_read_count, 1);
|
||||
PERF_COUNTER_ADD(block_read_byte, n + kBlockTrailerSize);
|
||||
|
||||
@ -228,6 +231,7 @@ Status ReadBlock(RandomAccessFile* file, const Footer& footer,
|
||||
// Check the crc of the type and the block contents
|
||||
const char* data = contents->data(); // Pointer to where Read put the data
|
||||
if (options.verify_checksums) {
|
||||
PERF_TIMER_GUARD(block_checksum_time);
|
||||
uint32_t value = DecodeFixed32(data + n + 1);
|
||||
uint32_t actual = 0;
|
||||
switch (footer.checksum()) {
|
||||
@ -247,7 +251,6 @@ Status ReadBlock(RandomAccessFile* file, const Footer& footer,
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
PERF_TIMER_STOP(block_checksum_time);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -265,7 +268,7 @@ Status DecompressBlock(BlockContents* result, size_t block_size,
|
||||
result->cachable = false;
|
||||
result->heap_allocated = false;
|
||||
|
||||
PERF_TIMER_AUTO(block_decompress_time);
|
||||
PERF_TIMER_GUARD(block_decompress_time);
|
||||
rocksdb::CompressionType compression_type =
|
||||
static_cast<rocksdb::CompressionType>(data[n]);
|
||||
// If the caller has requested that the block not be uncompressed
|
||||
@ -295,7 +298,6 @@ Status DecompressBlock(BlockContents* result, size_t block_size,
|
||||
} else {
|
||||
s = UncompressBlockContents(data, n, result);
|
||||
}
|
||||
PERF_TIMER_STOP(block_decompress_time);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -116,12 +116,12 @@ class MergingIterator : public Iterator {
|
||||
// Invalidate the heap.
|
||||
use_heap_ = false;
|
||||
IteratorWrapper* first_child = nullptr;
|
||||
PERF_TIMER_DECLARE();
|
||||
|
||||
for (auto& child : children_) {
|
||||
PERF_TIMER_START(seek_child_seek_time);
|
||||
child.Seek(target);
|
||||
PERF_TIMER_STOP(seek_child_seek_time);
|
||||
{
|
||||
PERF_TIMER_GUARD(seek_child_seek_time);
|
||||
child.Seek(target);
|
||||
}
|
||||
PERF_COUNTER_ADD(seek_child_seek_count, 1);
|
||||
|
||||
if (child.Valid()) {
|
||||
@ -134,24 +134,21 @@ class MergingIterator : public Iterator {
|
||||
} else {
|
||||
// We have more than one children with valid keys. Initialize
|
||||
// the heap and put the first child into the heap.
|
||||
PERF_TIMER_START(seek_min_heap_time);
|
||||
PERF_TIMER_GUARD(seek_min_heap_time);
|
||||
ClearHeaps();
|
||||
minHeap_.push(first_child);
|
||||
PERF_TIMER_STOP(seek_min_heap_time);
|
||||
}
|
||||
}
|
||||
if (use_heap_) {
|
||||
PERF_TIMER_START(seek_min_heap_time);
|
||||
PERF_TIMER_GUARD(seek_min_heap_time);
|
||||
minHeap_.push(&child);
|
||||
PERF_TIMER_STOP(seek_min_heap_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (use_heap_) {
|
||||
// If heap is valid, need to put the smallest key to curent_.
|
||||
PERF_TIMER_START(seek_min_heap_time);
|
||||
PERF_TIMER_GUARD(seek_min_heap_time);
|
||||
FindSmallest();
|
||||
PERF_TIMER_STOP(seek_min_heap_time);
|
||||
} else {
|
||||
// The heap is not valid, then the current_ iterator is the first
|
||||
// one, or null if there is no first child.
|
||||
|
@ -16,9 +16,7 @@
|
||||
namespace rocksdb {
|
||||
|
||||
MetaIndexBuilder::MetaIndexBuilder()
|
||||
: meta_index_block_(
|
||||
new BlockBuilder(1 /* restart interval */, BytewiseComparator())) {
|
||||
}
|
||||
: meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
|
||||
|
||||
void MetaIndexBuilder::Add(const std::string& key,
|
||||
const BlockHandle& handle) {
|
||||
@ -35,9 +33,7 @@ Slice MetaIndexBuilder::Finish() {
|
||||
}
|
||||
|
||||
PropertyBlockBuilder::PropertyBlockBuilder()
|
||||
: properties_block_(
|
||||
new BlockBuilder(1 /* restart interval */, BytewiseComparator())) {
|
||||
}
|
||||
: properties_block_(new BlockBuilder(1 /* restart interval */)) {}
|
||||
|
||||
void PropertyBlockBuilder::Add(const std::string& name,
|
||||
const std::string& val) {
|
||||
|
@ -169,7 +169,7 @@ class PlainTableFactory : public TableFactory {
|
||||
static const char kValueTypeSeqId0 = 0xFF;
|
||||
|
||||
// Sanitizes the specified DB Options.
|
||||
Status SanitizeDBOptions(DBOptions* db_opts) const override {
|
||||
Status SanitizeDBOptions(const DBOptions* db_opts) const override {
|
||||
if (db_opts->allow_mmap_reads == false) {
|
||||
return Status::NotSupported(
|
||||
"PlainTable with allow_mmap_reads == false is not supported.");
|
||||
|
@ -244,8 +244,7 @@ class BlockConstructor: public Constructor {
|
||||
const KVMap& data) {
|
||||
delete block_;
|
||||
block_ = nullptr;
|
||||
BlockBuilder builder(table_options.block_restart_interval,
|
||||
&internal_comparator);
|
||||
BlockBuilder builder(table_options.block_restart_interval);
|
||||
|
||||
for (KVMap::const_iterator it = data.begin();
|
||||
it != data.end();
|
||||
@ -1054,7 +1053,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
|
||||
ASSERT_EQ("", props.filter_policy_name); // no filter policy is used
|
||||
|
||||
// Verify data size.
|
||||
BlockBuilder block_builder(1, options.comparator);
|
||||
BlockBuilder block_builder(1);
|
||||
for (const auto& item : kvmap) {
|
||||
block_builder.Add(item.first, item.second);
|
||||
}
|
||||
|
@ -172,8 +172,9 @@ void TwoLevelIterator::InitDataBlock() {
|
||||
SetSecondLevelIterator(nullptr);
|
||||
} else {
|
||||
Slice handle = first_level_iter_.value();
|
||||
if (second_level_iter_.iter() != nullptr
|
||||
&& handle.compare(data_block_handle_) == 0) {
|
||||
if (second_level_iter_.iter() != nullptr &&
|
||||
!second_level_iter_.status().IsIncomplete() &&
|
||||
handle.compare(data_block_handle_) == 0) {
|
||||
// second_level_iter is already constructed with this iterator, so
|
||||
// no need to change anything
|
||||
} else {
|
||||
|
@ -31,6 +31,7 @@ int main() {
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <exception>
|
||||
#include <gflags/gflags.h>
|
||||
#include "db/db_impl.h"
|
||||
#include "db/version_set.h"
|
||||
@ -759,7 +760,7 @@ class StressTest {
|
||||
? NewBloomFilterPolicy(FLAGS_bloom_bits)
|
||||
: nullptr),
|
||||
db_(nullptr),
|
||||
new_column_family_name_(0),
|
||||
new_column_family_name_(1),
|
||||
num_times_reopened_(0) {
|
||||
if (FLAGS_destroy_db_initially) {
|
||||
std::vector<std::string> files;
|
||||
@ -1217,12 +1218,20 @@ class StressTest {
|
||||
Status s __attribute__((unused));
|
||||
s = db_->DropColumnFamily(column_families_[cf]);
|
||||
delete column_families_[cf];
|
||||
assert(s.ok());
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "dropping column family error: %s\n",
|
||||
s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
s = db_->CreateColumnFamily(ColumnFamilyOptions(options_), new_name,
|
||||
&column_families_[cf]);
|
||||
column_family_names_[cf] = new_name;
|
||||
thread->shared->ClearColumnFamily(cf);
|
||||
assert(s.ok());
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "creating column family error: %s\n",
|
||||
s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
thread->shared->UnlockColumnFamily(cf);
|
||||
}
|
||||
}
|
||||
@ -1297,10 +1306,15 @@ class StressTest {
|
||||
}
|
||||
}
|
||||
thread->shared->Put(rand_column_family, rand_key, value_base);
|
||||
Status s;
|
||||
if (FLAGS_use_merge) {
|
||||
db_->Merge(write_opts, column_family, key, v);
|
||||
s = db_->Merge(write_opts, column_family, key, v);
|
||||
} else {
|
||||
db_->Put(write_opts, column_family, key, v);
|
||||
s = db_->Put(write_opts, column_family, key, v);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
thread->stats.AddBytesForWrites(1, sz);
|
||||
} else {
|
||||
@ -1311,8 +1325,12 @@ class StressTest {
|
||||
// OPERATION delete
|
||||
if (!FLAGS_test_batches_snapshots) {
|
||||
thread->shared->Delete(rand_column_family, rand_key);
|
||||
db_->Delete(write_opts, column_family, key);
|
||||
Status s = db_->Delete(write_opts, column_family, key);
|
||||
thread->stats.AddDeletes(1);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
} else {
|
||||
MultiDelete(thread, write_opts, column_family, key);
|
||||
}
|
||||
|
@ -239,11 +239,23 @@ class PosixRandomAccessFile: public RandomAccessFile {
|
||||
char* scratch) const {
|
||||
Status s;
|
||||
ssize_t r = -1;
|
||||
do {
|
||||
r = pread(fd_, scratch, n, static_cast<off_t>(offset));
|
||||
} while (r < 0 && errno == EINTR);
|
||||
IOSTATS_ADD_IF_POSITIVE(bytes_read, r);
|
||||
*result = Slice(scratch, (r < 0) ? 0 : r);
|
||||
size_t left = n;
|
||||
char* ptr = scratch;
|
||||
while (left > 0) {
|
||||
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
|
||||
if (r <= 0) {
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr += r;
|
||||
offset += r;
|
||||
left -= r;
|
||||
}
|
||||
|
||||
IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
|
||||
*result = Slice(scratch, (r < 0) ? 0 : n - left);
|
||||
if (r < 0) {
|
||||
// An error: return a non-ok status
|
||||
s = IOError(filename_, errno);
|
||||
@ -907,9 +919,23 @@ class PosixRandomRWFile : public RandomRWFile {
|
||||
virtual Status Read(uint64_t offset, size_t n, Slice* result,
|
||||
char* scratch) const {
|
||||
Status s;
|
||||
ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
|
||||
IOSTATS_ADD_IF_POSITIVE(bytes_read, r);
|
||||
*result = Slice(scratch, (r < 0) ? 0 : r);
|
||||
ssize_t r = -1;
|
||||
size_t left = n;
|
||||
char* ptr = scratch;
|
||||
while (left > 0) {
|
||||
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
|
||||
if (r <= 0) {
|
||||
if (errno == EINTR) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr += r;
|
||||
offset += r;
|
||||
left -= r;
|
||||
}
|
||||
IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
|
||||
*result = Slice(scratch, (r < 0) ? 0 : n - left);
|
||||
if (r < 0) {
|
||||
s = IOError(filename_, errno);
|
||||
}
|
||||
@ -1018,15 +1044,12 @@ class PosixFileLock : public FileLock {
|
||||
std::string filename;
|
||||
};
|
||||
|
||||
|
||||
namespace {
|
||||
void PthreadCall(const char* label, int result) {
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class PosixEnv : public Env {
|
||||
public:
|
||||
@ -1724,12 +1747,11 @@ unsigned int PosixEnv::GetThreadPoolQueueLen(Priority pri) const {
|
||||
return thread_pools_[pri].GetQueueLen();
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct StartThreadState {
|
||||
void (*user_function)(void*);
|
||||
void* arg;
|
||||
};
|
||||
}
|
||||
|
||||
static void* StartThreadWrapper(void* arg) {
|
||||
StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
|
||||
state->user_function(state->arg);
|
||||
|
@ -9,7 +9,9 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
__thread IOStatsContext iostats_context;
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
|
||||
void IOStatsContext::Reset() {
|
||||
thread_pool_id = Env::Priority::TOTAL;
|
||||
|
@ -6,6 +6,8 @@
|
||||
#pragma once
|
||||
#include "rocksdb/iostats_context.h"
|
||||
|
||||
#ifndef IOS_CROSS_COMPILE
|
||||
|
||||
// increment a specific counter by the specified value
|
||||
#define IOSTATS_ADD(metric, value) \
|
||||
(iostats_context.metric += value)
|
||||
@ -30,3 +32,15 @@
|
||||
|
||||
#define IOSTATS(metric) \
|
||||
(iostats_context.metric)
|
||||
|
||||
#else // IOS_CROSS_COMPILE
|
||||
|
||||
#define IOSTATS_ADD(metric, value)
|
||||
#define IOSTATS_ADD_IF_POSITIVE(metric, value)
|
||||
#define IOSTATS_RESET(metric)
|
||||
#define IOSTATS_RESET_ALL()
|
||||
#define IOSTATS_SET_THREAD_POOL_ID(value)
|
||||
#define IOSTATS_THREAD_POOL_ID()
|
||||
#define IOSTATS(metric) 0
|
||||
|
||||
#endif // IOS_CROSS_COMPILE
|
||||
|
@ -11,11 +11,10 @@ namespace rocksdb {
|
||||
|
||||
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)
|
||||
|
||||
#define PERF_TIMER_DECLARE()
|
||||
#define PERF_TIMER_START(metric)
|
||||
#define PERF_TIMER_AUTO(metric)
|
||||
#define PERF_TIMER_GUARD(metric)
|
||||
#define PERF_TIMER_MEASURE(metric)
|
||||
#define PERF_TIMER_STOP(metric)
|
||||
#define PERF_TIMER_START(metric)
|
||||
#define PERF_COUNTER_ADD(metric, value)
|
||||
|
||||
#else
|
||||
@ -24,10 +23,15 @@ extern __thread PerfLevel perf_level;
|
||||
|
||||
class PerfStepTimer {
|
||||
public:
|
||||
PerfStepTimer()
|
||||
PerfStepTimer(uint64_t* metric)
|
||||
: enabled_(perf_level >= PerfLevel::kEnableTime),
|
||||
env_(enabled_ ? Env::Default() : nullptr),
|
||||
start_(0) {
|
||||
start_(0),
|
||||
metric_(metric) {
|
||||
}
|
||||
|
||||
~PerfStepTimer() {
|
||||
Stop();
|
||||
}
|
||||
|
||||
void Start() {
|
||||
@ -36,17 +40,17 @@ class PerfStepTimer {
|
||||
}
|
||||
}
|
||||
|
||||
void Measure(uint64_t* metric) {
|
||||
void Measure() {
|
||||
if (start_) {
|
||||
uint64_t now = env_->NowNanos();
|
||||
*metric += now - start_;
|
||||
*metric_ += now - start_;
|
||||
start_ = now;
|
||||
}
|
||||
}
|
||||
|
||||
void Stop(uint64_t* metric) {
|
||||
void Stop() {
|
||||
if (start_) {
|
||||
*metric += env_->NowNanos() - start_;
|
||||
*metric_ += env_->NowNanos() - start_;
|
||||
start_ = 0;
|
||||
}
|
||||
}
|
||||
@ -55,29 +59,25 @@ class PerfStepTimer {
|
||||
const bool enabled_;
|
||||
Env* const env_;
|
||||
uint64_t start_;
|
||||
uint64_t* metric_;
|
||||
};
|
||||
|
||||
// Declare the local timer object to be used later on
|
||||
#define PERF_TIMER_DECLARE() \
|
||||
PerfStepTimer perf_step_timer;
|
||||
// Stop the timer and update the metric
|
||||
#define PERF_TIMER_STOP(metric) \
|
||||
perf_step_timer_ ## metric.Stop();
|
||||
|
||||
// Set start time of the timer
|
||||
#define PERF_TIMER_START(metric) \
|
||||
perf_step_timer.Start();
|
||||
perf_step_timer_ ## metric.Start();
|
||||
|
||||
// Declare and set start time of the timer
|
||||
#define PERF_TIMER_AUTO(metric) \
|
||||
PerfStepTimer perf_step_timer; \
|
||||
perf_step_timer.Start();
|
||||
#define PERF_TIMER_GUARD(metric) \
|
||||
PerfStepTimer perf_step_timer_ ## metric(&(perf_context.metric)); \
|
||||
perf_step_timer_ ## metric.Start();
|
||||
|
||||
// Update metric with time elapsed since last START. start time is reset
|
||||
// to current timestamp.
|
||||
#define PERF_TIMER_MEASURE(metric) \
|
||||
perf_step_timer.Measure(&(perf_context.metric));
|
||||
|
||||
// Update metric with time elapsed since last START. But start time is not set.
|
||||
#define PERF_TIMER_STOP(metric) \
|
||||
perf_step_timer.Stop(&(perf_context.metric));
|
||||
perf_step_timer_ ## metric.Measure();
|
||||
|
||||
// Increase metric value
|
||||
#define PERF_COUNTER_ADD(metric, value) \
|
||||
|
@ -621,10 +621,13 @@ class SpatialDBImpl : public SpatialDB {
|
||||
namespace {
|
||||
DBOptions GetDBOptions(const SpatialDBOptions& options) {
|
||||
DBOptions db_options;
|
||||
db_options.max_background_compactions = options.num_threads / 2;
|
||||
db_options.max_background_flushes = options.num_threads / 2;
|
||||
db_options.env->SetBackgroundThreads(db_options.max_background_compactions, Env::LOW);
|
||||
db_options.env->SetBackgroundThreads(db_options.max_background_flushes, Env::HIGH);
|
||||
db_options.max_background_compactions = 3 * options.num_threads / 4;
|
||||
db_options.max_background_flushes =
|
||||
options.num_threads - db_options.max_background_compactions;
|
||||
db_options.env->SetBackgroundThreads(db_options.max_background_compactions,
|
||||
Env::LOW);
|
||||
db_options.env->SetBackgroundThreads(db_options.max_background_flushes,
|
||||
Env::HIGH);
|
||||
if (options.bulk_load) {
|
||||
db_options.disableDataSync = true;
|
||||
}
|
||||
@ -634,14 +637,16 @@ DBOptions GetDBOptions(const SpatialDBOptions& options) {
|
||||
ColumnFamilyOptions GetColumnFamilyOptions(const SpatialDBOptions& options,
|
||||
std::shared_ptr<Cache> block_cache) {
|
||||
ColumnFamilyOptions column_family_options;
|
||||
column_family_options.write_buffer_size = 128 * 1024 * 1024; // 128MB
|
||||
column_family_options.max_bytes_for_level_base = 1024 * 1024 * 1024; // 1 GB
|
||||
column_family_options.write_buffer_size = 128 * 1024 * 1024; // 128MB
|
||||
column_family_options.max_write_buffer_number = 4;
|
||||
// only compress levels >= 1
|
||||
column_family_options.level0_file_num_compaction_trigger = 2;
|
||||
column_family_options.level0_slowdown_writes_trigger = 16;
|
||||
column_family_options.level0_slowdown_writes_trigger = 32;
|
||||
// only compress levels >= 2
|
||||
column_family_options.compression_per_level.resize(
|
||||
column_family_options.num_levels);
|
||||
for (int i = 0; i < column_family_options.num_levels; ++i) {
|
||||
if (i == 0) {
|
||||
if (i < 2) {
|
||||
column_family_options.compression_per_level[i] = kNoCompression;
|
||||
} else {
|
||||
column_family_options.compression_per_level[i] = kLZ4Compression;
|
||||
@ -651,17 +656,6 @@ ColumnFamilyOptions GetColumnFamilyOptions(const SpatialDBOptions& options,
|
||||
table_options.block_cache = block_cache;
|
||||
column_family_options.table_factory.reset(
|
||||
NewBlockBasedTableFactory(table_options));
|
||||
if (options.bulk_load) {
|
||||
column_family_options.level0_file_num_compaction_trigger = (1 << 30);
|
||||
column_family_options.level0_slowdown_writes_trigger = (1 << 30);
|
||||
column_family_options.level0_stop_writes_trigger = (1 << 30);
|
||||
column_family_options.disable_auto_compactions = true;
|
||||
column_family_options.source_compaction_factor = (1 << 30);
|
||||
column_family_options.num_levels = 2;
|
||||
column_family_options.target_file_size_base = 256 * 1024 * 1024;
|
||||
column_family_options.max_mem_compaction_level = 0;
|
||||
column_family_options.memtable_factory.reset(new VectorRepFactory());
|
||||
}
|
||||
return column_family_options;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user