Make rocksdb-deletes faster using bloom filter
Summary: Wrote a new function in db_impl.c-CheckKeyMayExist that calls Get but with a new parameter turned on which makes Get return false only if bloom filters can guarantee that key is not in database. Delete calls this function and if the option- deletes_use_filter is turned on and CheckKeyMayExist returns false, the delete will be dropped saving: 1. Put of delete type 2. Space in the db,and 3. Compaction time Test Plan: make all check; will run db_stress and db_bench and enhance unit-test once the basic design gets approved Reviewers: dhruba, haobo, vamsi Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D11607
This commit is contained in:
parent
8a5341ec7d
commit
2a986919d6
@ -327,6 +327,9 @@ static auto FLAGS_use_adaptive_mutex =
|
|||||||
static auto FLAGS_bytes_per_sync =
|
static auto FLAGS_bytes_per_sync =
|
||||||
leveldb::Options().bytes_per_sync;
|
leveldb::Options().bytes_per_sync;
|
||||||
|
|
||||||
|
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||||
|
static bool FLAGS_deletes_check_filter_first = false;
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
// Helper for quickly generating random data.
|
// Helper for quickly generating random data.
|
||||||
@ -1111,6 +1114,7 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
|
|||||||
options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
|
options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
|
||||||
options.max_bytes_for_level_multiplier =
|
options.max_bytes_for_level_multiplier =
|
||||||
FLAGS_max_bytes_for_level_multiplier;
|
FLAGS_max_bytes_for_level_multiplier;
|
||||||
|
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
||||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) {
|
if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) {
|
||||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() !=
|
if (FLAGS_max_bytes_for_level_multiplier_additional.size() !=
|
||||||
(unsigned int)FLAGS_num_levels) {
|
(unsigned int)FLAGS_num_levels) {
|
||||||
@ -2216,6 +2220,9 @@ int main(int argc, char** argv) {
|
|||||||
FLAGS_keys_per_multiget = n;
|
FLAGS_keys_per_multiget = n;
|
||||||
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
|
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
|
||||||
FLAGS_bytes_per_sync = l;
|
FLAGS_bytes_per_sync = l;
|
||||||
|
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
||||||
|
== 1 && (n == 0 || n ==1 )) {
|
||||||
|
FLAGS_deletes_check_filter_first = n;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -1998,6 +1998,16 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
|||||||
Status DBImpl::Get(const ReadOptions& options,
|
Status DBImpl::Get(const ReadOptions& options,
|
||||||
const Slice& key,
|
const Slice& key,
|
||||||
std::string* value) {
|
std::string* value) {
|
||||||
|
return GetImpl(options, key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no_IO is true, then returns Status::NotFound if key is not in memtable,
|
||||||
|
// immutable-memtable and bloom-filters can guarantee that key is not in db,
|
||||||
|
// "value" is garbage string if no_IO is true
|
||||||
|
Status DBImpl::GetImpl(const ReadOptions& options,
|
||||||
|
const Slice& key,
|
||||||
|
std::string* value,
|
||||||
|
const bool no_IO) {
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
StopWatch sw(env_, options_.statistics, DB_GET);
|
StopWatch sw(env_, options_.statistics, DB_GET);
|
||||||
@ -2026,12 +2036,12 @@ Status DBImpl::Get(const ReadOptions& options,
|
|||||||
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
||||||
// value will contain the current merge operand in the latter case.
|
// value will contain the current merge operand in the latter case.
|
||||||
LookupKey lkey(key, snapshot);
|
LookupKey lkey(key, snapshot);
|
||||||
if (mem->Get(lkey, value, &s, options_)) {
|
if (mem->Get(lkey, value, &s, options_, no_IO)) {
|
||||||
// Done
|
// Done
|
||||||
} else if (imm.Get(lkey, value, &s, options_)) {
|
} else if (imm.Get(lkey, value, &s, options_, no_IO)) {
|
||||||
// Done
|
// Done
|
||||||
} else {
|
} else {
|
||||||
current->Get(options, lkey, value, &s, &stats, options_);
|
current->Get(options, lkey, value, &s, &stats, options_, no_IO);
|
||||||
have_stat_update = true;
|
have_stat_update = true;
|
||||||
}
|
}
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
@ -2121,6 +2131,12 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
return statList;
|
return statList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DBImpl::KeyMayExist(const Slice& key) {
|
||||||
|
std::string value;
|
||||||
|
const Status s = GetImpl(ReadOptions(), key, &value, true);
|
||||||
|
return !s.IsNotFound();
|
||||||
|
}
|
||||||
|
|
||||||
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
||||||
SequenceNumber latest_snapshot;
|
SequenceNumber latest_snapshot;
|
||||||
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
|
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
|
||||||
@ -2156,6 +2172,10 @@ Status DBImpl::Merge(const WriteOptions& o, const Slice& key,
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
||||||
|
if (options_.deletes_check_filter_first && !KeyMayExist(key)) {
|
||||||
|
RecordTick(options_.statistics, NUMBER_FILTERED_DELETES);
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
return DB::Delete(options, key);
|
return DB::Delete(options, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
db/db_impl.h
10
db/db_impl.h
@ -48,6 +48,10 @@ class DBImpl : public DB {
|
|||||||
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
||||||
const std::vector<Slice>& keys,
|
const std::vector<Slice>& keys,
|
||||||
std::vector<std::string>* values);
|
std::vector<std::string>* values);
|
||||||
|
|
||||||
|
// Returns false if key can't exist- based on memtable, immutable-memtable and
|
||||||
|
// bloom-filters; true otherwise. No IO is performed
|
||||||
|
virtual bool KeyMayExist(const Slice& key);
|
||||||
virtual Iterator* NewIterator(const ReadOptions&);
|
virtual Iterator* NewIterator(const ReadOptions&);
|
||||||
virtual const Snapshot* GetSnapshot();
|
virtual const Snapshot* GetSnapshot();
|
||||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||||
@ -379,6 +383,12 @@ class DBImpl : public DB {
|
|||||||
SequenceNumber in,
|
SequenceNumber in,
|
||||||
std::vector<SequenceNumber>& snapshots,
|
std::vector<SequenceNumber>& snapshots,
|
||||||
SequenceNumber* prev_snapshot);
|
SequenceNumber* prev_snapshot);
|
||||||
|
|
||||||
|
// Function that Get and KeyMayExist call with no_IO true or false
|
||||||
|
Status GetImpl(const ReadOptions& options,
|
||||||
|
const Slice& key,
|
||||||
|
std::string* value,
|
||||||
|
const bool no_IO = false);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sanitize db options. The caller should delete result.info_log if
|
// Sanitize db options. The caller should delete result.info_log if
|
||||||
|
@ -218,6 +218,7 @@ class DBTest {
|
|||||||
kManifestFileSize,
|
kManifestFileSize,
|
||||||
kCompactOnFlush,
|
kCompactOnFlush,
|
||||||
kPerfOptions,
|
kPerfOptions,
|
||||||
|
kDeletesFilterFirst,
|
||||||
kEnd
|
kEnd
|
||||||
};
|
};
|
||||||
int option_config_;
|
int option_config_;
|
||||||
@ -289,6 +290,9 @@ class DBTest {
|
|||||||
options.rate_limit_delay_milliseconds = 2;
|
options.rate_limit_delay_milliseconds = 2;
|
||||||
// TODO -- test more options
|
// TODO -- test more options
|
||||||
break;
|
break;
|
||||||
|
case kDeletesFilterFirst:
|
||||||
|
options.deletes_check_filter_first = true;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -768,6 +772,37 @@ TEST(DBTest, GetEncountersEmptyLevel) {
|
|||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// KeyMayExist-API returns false if memtable(s) and in-memory bloom-filters can
|
||||||
|
// guarantee that the key doesn't exist in the db, else true. This can lead to
|
||||||
|
// a few false positives, but not false negatives. To make test deterministic,
|
||||||
|
// use a much larger number of bits per key-20 than bits in the key, so
|
||||||
|
// that false positives are eliminated
|
||||||
|
TEST(DBTest, KeyMayExist) {
|
||||||
|
do {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.filter_policy = NewBloomFilterPolicy(20);
|
||||||
|
Reopen(&options);
|
||||||
|
|
||||||
|
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||||
|
|
||||||
|
ASSERT_OK(db_->Put(WriteOptions(), "a", "b"));
|
||||||
|
ASSERT_TRUE(db_->KeyMayExist("a"));
|
||||||
|
|
||||||
|
dbfull()->Flush(FlushOptions());
|
||||||
|
ASSERT_TRUE(db_->KeyMayExist("a"));
|
||||||
|
|
||||||
|
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
||||||
|
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||||
|
|
||||||
|
dbfull()->Flush(FlushOptions());
|
||||||
|
dbfull()->CompactRange(nullptr, nullptr);
|
||||||
|
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||||
|
|
||||||
|
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
||||||
|
ASSERT_TRUE(!db_->KeyMayExist("c"));
|
||||||
|
} while (ChangeOptions());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(DBTest, IterEmpty) {
|
TEST(DBTest, IterEmpty) {
|
||||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||||
|
|
||||||
@ -1403,7 +1438,7 @@ class DeleteFilter : public CompactionFilter {
|
|||||||
|
|
||||||
class ChangeFilter : public CompactionFilter {
|
class ChangeFilter : public CompactionFilter {
|
||||||
public:
|
public:
|
||||||
ChangeFilter(int argv) : argv_(argv) {}
|
explicit ChangeFilter(int argv) : argv_(argv) {}
|
||||||
|
|
||||||
virtual bool Filter(int level, const Slice& key,
|
virtual bool Filter(int level, const Slice& key,
|
||||||
const Slice& value, std::string* new_value,
|
const Slice& value, std::string* new_value,
|
||||||
@ -2970,6 +3005,9 @@ class ModelDB: public DB {
|
|||||||
Status::NotSupported("Not implemented."));
|
Status::NotSupported("Not implemented."));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
virtual bool KeyMayExist(const Slice& key) {
|
||||||
|
return true; // Not Supported directly
|
||||||
|
}
|
||||||
virtual Iterator* NewIterator(const ReadOptions& options) {
|
virtual Iterator* NewIterator(const ReadOptions& options) {
|
||||||
if (options.snapshot == nullptr) {
|
if (options.snapshot == nullptr) {
|
||||||
KVMap* saved = new KVMap;
|
KVMap* saved = new KVMap;
|
||||||
|
@ -119,7 +119,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
const Options& options) {
|
const Options& options, const bool check_presence_only) {
|
||||||
Slice memkey = key.memtable_key();
|
Slice memkey = key.memtable_key();
|
||||||
Table::Iterator iter(&table_);
|
Table::Iterator iter(&table_);
|
||||||
iter.Seek(memkey.data());
|
iter.Seek(memkey.data());
|
||||||
@ -164,6 +164,10 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case kTypeMerge: {
|
case kTypeMerge: {
|
||||||
|
if (check_presence_only) {
|
||||||
|
*s = Status::OK();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
||||||
if (merge_in_progress) {
|
if (merge_in_progress) {
|
||||||
merge_operator->Merge(key.user_key(), &v, operand,
|
merge_operator->Merge(key.user_key(), &v, operand,
|
||||||
|
@ -63,12 +63,13 @@ class MemTable {
|
|||||||
// If memtable contains a deletion for key, store a NotFound() error
|
// If memtable contains a deletion for key, store a NotFound() error
|
||||||
// in *status and return true.
|
// in *status and return true.
|
||||||
// If memtable contains Merge operation as the most recent entry for a key,
|
// If memtable contains Merge operation as the most recent entry for a key,
|
||||||
// and the merge process does not stop (not reaching a value or delete),
|
// and if check_presence_only is set, return true with Status::OK,
|
||||||
|
// else if the merge process does not stop (not reaching a value or delete),
|
||||||
// store the current merged result in value and MergeInProgress in s.
|
// store the current merged result in value and MergeInProgress in s.
|
||||||
// return false
|
// return false
|
||||||
// Else, return false.
|
// Else, return false.
|
||||||
bool Get(const LookupKey& key, std::string* value, Status* s,
|
bool Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
const Options& options);
|
const Options& options, const bool check_presence_only = false);
|
||||||
|
|
||||||
// Returns the edits area that is needed for flushing the memtable
|
// Returns the edits area that is needed for flushing the memtable
|
||||||
VersionEdit* GetEdits() { return &edit_; }
|
VersionEdit* GetEdits() { return &edit_; }
|
||||||
|
@ -194,10 +194,10 @@ size_t MemTableList::ApproximateMemoryUsage() {
|
|||||||
// Search all the memtables starting from the most recent one.
|
// Search all the memtables starting from the most recent one.
|
||||||
// Return the most recent value found, if any.
|
// Return the most recent value found, if any.
|
||||||
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s,
|
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
const Options& options ) {
|
const Options& options, const bool check_presence_only) {
|
||||||
for (list<MemTable*>::iterator it = memlist_.begin();
|
for (list<MemTable*>::iterator it = memlist_.begin();
|
||||||
it != memlist_.end(); ++it) {
|
it != memlist_.end(); ++it) {
|
||||||
if ((*it)->Get(key, value, s, options)) {
|
if ((*it)->Get(key, value, s, options, check_presence_only)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -71,7 +71,7 @@ class MemTableList {
|
|||||||
// Search all the memtables starting from the most recent one.
|
// Search all the memtables starting from the most recent one.
|
||||||
// Return the most recent value found, if any.
|
// Return the most recent value found, if any.
|
||||||
bool Get(const LookupKey& key, std::string* value, Status* s,
|
bool Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
const Options& options);
|
const Options& options, const bool check_presence_only = false);
|
||||||
|
|
||||||
// Returns the list of underlying memtables.
|
// Returns the list of underlying memtables.
|
||||||
void GetMemTables(std::vector<MemTable*>* list);
|
void GetMemTables(std::vector<MemTable*>* list);
|
||||||
|
@ -112,14 +112,16 @@ Status TableCache::Get(const ReadOptions& options,
|
|||||||
const Slice& k,
|
const Slice& k,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*saver)(void*, const Slice&, const Slice&, bool),
|
bool (*saver)(void*, const Slice&, const Slice&, bool),
|
||||||
bool* tableIO) {
|
bool* tableIO,
|
||||||
|
void (*mark_key_may_exist)(void*),
|
||||||
|
const bool no_IO) {
|
||||||
Cache::Handle* handle = nullptr;
|
Cache::Handle* handle = nullptr;
|
||||||
Status s = FindTable(storage_options_, file_number, file_size,
|
Status s = FindTable(storage_options_, file_number, file_size,
|
||||||
&handle, tableIO);
|
&handle, tableIO);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
Table* t =
|
Table* t =
|
||||||
reinterpret_cast<Table*>(cache_->Value(handle));
|
reinterpret_cast<Table*>(cache_->Value(handle));
|
||||||
s = t->InternalGet(options, k, arg, saver);
|
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist, no_IO);
|
||||||
cache_->Release(handle);
|
cache_->Release(handle);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
|
@ -48,7 +48,9 @@ class TableCache {
|
|||||||
const Slice& k,
|
const Slice& k,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*handle_result)(void*, const Slice&, const Slice&, bool),
|
bool (*handle_result)(void*, const Slice&, const Slice&, bool),
|
||||||
bool* tableIO);
|
bool* tableIO,
|
||||||
|
void (*mark_key_may_exist)(void*) = nullptr,
|
||||||
|
const bool no_IO = false);
|
||||||
|
|
||||||
// Evict any entry for the specified file number
|
// Evict any entry for the specified file number
|
||||||
void Evict(uint64_t file_number);
|
void Evict(uint64_t file_number);
|
||||||
|
@ -244,6 +244,16 @@ struct Saver {
|
|||||||
bool didIO; // did we do any disk io?
|
bool didIO; // did we do any disk io?
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Called from TableCache::Get when bloom-filters can't guarantee that key does
|
||||||
|
// not exist and Get is not permitted to do IO to read the data-block and be
|
||||||
|
// certain.
|
||||||
|
// Set the key as Found and let the caller know that key-may-exist
|
||||||
|
static void MarkKeyMayExist(void* arg) {
|
||||||
|
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||||
|
s->state = kFound;
|
||||||
|
}
|
||||||
|
|
||||||
static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
|
static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
|
||||||
Saver* s = reinterpret_cast<Saver*>(arg);
|
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||||
ParsedInternalKey parsed_key;
|
ParsedInternalKey parsed_key;
|
||||||
@ -328,7 +338,8 @@ void Version::Get(const ReadOptions& options,
|
|||||||
std::string* value,
|
std::string* value,
|
||||||
Status *status,
|
Status *status,
|
||||||
GetStats* stats,
|
GetStats* stats,
|
||||||
const Options& db_options) {
|
const Options& db_options,
|
||||||
|
const bool no_IO) {
|
||||||
Slice ikey = k.internal_key();
|
Slice ikey = k.internal_key();
|
||||||
Slice user_key = k.user_key();
|
Slice user_key = k.user_key();
|
||||||
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
||||||
@ -337,6 +348,9 @@ void Version::Get(const ReadOptions& options,
|
|||||||
auto logger = db_options.info_log;
|
auto logger = db_options.info_log;
|
||||||
|
|
||||||
assert(status->ok() || status->IsMergeInProgress());
|
assert(status->ok() || status->IsMergeInProgress());
|
||||||
|
if (no_IO) {
|
||||||
|
assert(status->ok());
|
||||||
|
}
|
||||||
Saver saver;
|
Saver saver;
|
||||||
saver.state = status->ok()? kNotFound : kMerge;
|
saver.state = status->ok()? kNotFound : kMerge;
|
||||||
saver.ucmp = ucmp;
|
saver.ucmp = ucmp;
|
||||||
@ -404,7 +418,8 @@ void Version::Get(const ReadOptions& options,
|
|||||||
FileMetaData* f = files[i];
|
FileMetaData* f = files[i];
|
||||||
bool tableIO = false;
|
bool tableIO = false;
|
||||||
*status = vset_->table_cache_->Get(options, f->number, f->file_size,
|
*status = vset_->table_cache_->Get(options, f->number, f->file_size,
|
||||||
ikey, &saver, SaveValue, &tableIO);
|
ikey, &saver, SaveValue, &tableIO,
|
||||||
|
MarkKeyMayExist, no_IO);
|
||||||
// TODO: examine the behavior for corrupted key
|
// TODO: examine the behavior for corrupted key
|
||||||
if (!status->ok()) {
|
if (!status->ok()) {
|
||||||
return;
|
return;
|
||||||
|
@ -74,7 +74,8 @@ class Version {
|
|||||||
int seek_file_level;
|
int seek_file_level;
|
||||||
};
|
};
|
||||||
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
||||||
Status* status, GetStats* stats, const Options& db_option);
|
Status* status, GetStats* stats, const Options& db_option,
|
||||||
|
const bool no_IO = false);
|
||||||
|
|
||||||
// Adds "stats" into the current state. Returns true if a new
|
// Adds "stats" into the current state. Returns true if a new
|
||||||
// compaction may need to be triggered, false otherwise.
|
// compaction may need to be triggered, false otherwise.
|
||||||
|
@ -120,6 +120,11 @@ class DB {
|
|||||||
const std::vector<Slice>& keys,
|
const std::vector<Slice>& keys,
|
||||||
std::vector<std::string>* values) = 0;
|
std::vector<std::string>* values) = 0;
|
||||||
|
|
||||||
|
// If the key definitely does not exist in the database, then this method
|
||||||
|
// returns false. Otherwise return true. This check is potentially
|
||||||
|
// lighter-weight than invoking DB::Get(). No IO is performed
|
||||||
|
virtual bool KeyMayExist(const Slice& key) = 0;
|
||||||
|
|
||||||
// Return a heap-allocated iterator over the contents of the database.
|
// Return a heap-allocated iterator over the contents of the database.
|
||||||
// The result of NewIterator() is initially invalid (caller must
|
// The result of NewIterator() is initially invalid (caller must
|
||||||
// call one of the Seek methods on the iterator before using it).
|
// call one of the Seek methods on the iterator before using it).
|
||||||
|
@ -465,6 +465,15 @@ struct Options {
|
|||||||
// Default: 0
|
// Default: 0
|
||||||
uint64_t bytes_per_sync;
|
uint64_t bytes_per_sync;
|
||||||
|
|
||||||
|
// Use bloom-filter for deletes when this is true.
|
||||||
|
// db->Delete first calls KeyMayExist which checks memtable,immutable-memtable
|
||||||
|
// and bloom-filters to determine if the key does not exist in the database.
|
||||||
|
// If the key definitely does not exist, then the delete is a noop.KeyMayExist
|
||||||
|
// only incurs in-memory look up. This optimization avoids writing the delete
|
||||||
|
// to storage when appropriate.
|
||||||
|
// Default: false
|
||||||
|
bool deletes_check_filter_first;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Options that control read operations
|
// Options that control read operations
|
||||||
|
@ -58,7 +58,9 @@ enum Tickers {
|
|||||||
NUMBER_MULTIGET_KEYS_READ = 19,
|
NUMBER_MULTIGET_KEYS_READ = 19,
|
||||||
NUMBER_MULTIGET_BYTES_READ = 20,
|
NUMBER_MULTIGET_BYTES_READ = 20,
|
||||||
|
|
||||||
TICKER_ENUM_MAX = 21
|
NUMBER_FILTERED_DELETES = 21,
|
||||||
|
|
||||||
|
TICKER_ENUM_MAX = 22
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||||
@ -82,7 +84,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|||||||
{ NO_ITERATORS, "rocksdb.num.iterators" },
|
{ NO_ITERATORS, "rocksdb.num.iterators" },
|
||||||
{ NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" },
|
{ NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" },
|
||||||
{ NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" },
|
{ NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" },
|
||||||
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" }
|
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" },
|
||||||
|
{ NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered" }
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -322,7 +322,9 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
|
|||||||
Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*saver)(void*, const Slice&, const Slice&,
|
bool (*saver)(void*, const Slice&, const Slice&,
|
||||||
bool)) {
|
bool),
|
||||||
|
void (*mark_key_may_exist)(void*),
|
||||||
|
const bool no_IO) {
|
||||||
Status s;
|
Status s;
|
||||||
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
||||||
bool done = false;
|
bool done = false;
|
||||||
@ -338,6 +340,11 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
|||||||
// cross one data block, we should be fine.
|
// cross one data block, we should be fine.
|
||||||
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
|
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
|
||||||
break;
|
break;
|
||||||
|
} else if (no_IO) {
|
||||||
|
// Update Saver.state to Found because we are only looking for whether
|
||||||
|
// bloom-filter can guarantee the key is not there when "no_IO"
|
||||||
|
(*mark_key_may_exist)(arg);
|
||||||
|
done = true;
|
||||||
} else {
|
} else {
|
||||||
bool didIO = false;
|
bool didIO = false;
|
||||||
Iterator* block_iter = BlockReader(this, options, iiter->value(),
|
Iterator* block_iter = BlockReader(this, options, iiter->value(),
|
||||||
|
@ -86,7 +86,9 @@ class Table {
|
|||||||
Status InternalGet(
|
Status InternalGet(
|
||||||
const ReadOptions&, const Slice& key,
|
const ReadOptions&, const Slice& key,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool));
|
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
|
||||||
|
void (*mark_key_may_exist)(void*) = nullptr,
|
||||||
|
const bool no_IO = false);
|
||||||
|
|
||||||
|
|
||||||
void ReadMeta(const Footer& footer);
|
void ReadMeta(const Footer& footer);
|
||||||
|
@ -180,6 +180,9 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock
|
|||||||
// Percentage of times we want to purge redundant keys in memory before flushing
|
// Percentage of times we want to purge redundant keys in memory before flushing
|
||||||
static uint32_t FLAGS_purge_redundant_percent = 50;
|
static uint32_t FLAGS_purge_redundant_percent = 50;
|
||||||
|
|
||||||
|
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||||
|
static bool FLAGS_deletes_check_filter_first = false;
|
||||||
|
|
||||||
// Level0 compaction start trigger
|
// Level0 compaction start trigger
|
||||||
static int FLAGS_level0_file_num_compaction_trigger = 0;
|
static int FLAGS_level0_file_num_compaction_trigger = 0;
|
||||||
|
|
||||||
@ -900,6 +903,8 @@ class StressTest {
|
|||||||
FLAGS_test_batches_snapshots);
|
FLAGS_test_batches_snapshots);
|
||||||
fprintf(stdout, "Purge redundant %% : %d\n",
|
fprintf(stdout, "Purge redundant %% : %d\n",
|
||||||
FLAGS_purge_redundant_percent);
|
FLAGS_purge_redundant_percent);
|
||||||
|
fprintf(stdout, "Deletes use filter : %d\n",
|
||||||
|
FLAGS_deletes_check_filter_first);
|
||||||
fprintf(stdout, "Num keys per lock : %d\n",
|
fprintf(stdout, "Num keys per lock : %d\n",
|
||||||
1 << FLAGS_log2_keys_per_lock);
|
1 << FLAGS_log2_keys_per_lock);
|
||||||
|
|
||||||
@ -955,6 +960,7 @@ class StressTest {
|
|||||||
options.delete_obsolete_files_period_micros =
|
options.delete_obsolete_files_period_micros =
|
||||||
FLAGS_delete_obsolete_files_period_micros;
|
FLAGS_delete_obsolete_files_period_micros;
|
||||||
options.max_manifest_file_size = 1024;
|
options.max_manifest_file_size = 1024;
|
||||||
|
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
||||||
static Random purge_percent(1000); // no benefit from non-determinism here
|
static Random purge_percent(1000); // no benefit from non-determinism here
|
||||||
if (purge_percent.Uniform(100) < FLAGS_purge_redundant_percent - 1) {
|
if (purge_percent.Uniform(100) < FLAGS_purge_redundant_percent - 1) {
|
||||||
options.purge_redundant_kvs_while_flush = false;
|
options.purge_redundant_kvs_while_flush = false;
|
||||||
@ -1154,6 +1160,9 @@ int main(int argc, char** argv) {
|
|||||||
} else if (sscanf(argv[i], "--purge_redundant_percent=%d%c", &n, &junk) == 1
|
} else if (sscanf(argv[i], "--purge_redundant_percent=%d%c", &n, &junk) == 1
|
||||||
&& (n >= 0 && n <= 100)) {
|
&& (n >= 0 && n <= 100)) {
|
||||||
FLAGS_purge_redundant_percent = n;
|
FLAGS_purge_redundant_percent = n;
|
||||||
|
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
||||||
|
== 1 && (n == 0 || n == 1)) {
|
||||||
|
FLAGS_deletes_check_filter_first = n;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -74,7 +74,8 @@ Options::Options()
|
|||||||
advise_random_on_open(true),
|
advise_random_on_open(true),
|
||||||
access_hint_on_compaction_start(NORMAL),
|
access_hint_on_compaction_start(NORMAL),
|
||||||
use_adaptive_mutex(false),
|
use_adaptive_mutex(false),
|
||||||
bytes_per_sync(0) {
|
bytes_per_sync(0),
|
||||||
|
deletes_check_filter_first(false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* const access_hints[] = {
|
static const char* const access_hints[] = {
|
||||||
@ -208,6 +209,8 @@ Options::Dump(Logger* log) const
|
|||||||
use_adaptive_mutex);
|
use_adaptive_mutex);
|
||||||
Log(log," Options.bytes_per_sync: %ld",
|
Log(log," Options.bytes_per_sync: %ld",
|
||||||
bytes_per_sync);
|
bytes_per_sync);
|
||||||
|
Log(log," Options.deletes_check_filter_first: %d",
|
||||||
|
deletes_check_filter_first);
|
||||||
} // Options::Dump
|
} // Options::Dump
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -158,6 +158,10 @@ std::vector<Status> DBWithTTL::MultiGet(const ReadOptions& options,
|
|||||||
supported with TTL"));
|
supported with TTL"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DBWithTTL::KeyMayExist(const Slice& key) {
|
||||||
|
return db_->KeyMayExist(key);
|
||||||
|
}
|
||||||
|
|
||||||
Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) {
|
Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) {
|
||||||
return db_->Delete(wopts, key);
|
return db_->Delete(wopts, key);
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,8 @@ class DBWithTTL : public DB, CompactionFilter {
|
|||||||
const std::vector<Slice>& keys,
|
const std::vector<Slice>& keys,
|
||||||
std::vector<std::string>* values);
|
std::vector<std::string>* values);
|
||||||
|
|
||||||
|
virtual bool KeyMayExist(const Slice& key);
|
||||||
|
|
||||||
virtual Status Delete(const WriteOptions& wopts, const Slice& key);
|
virtual Status Delete(const WriteOptions& wopts, const Slice& key);
|
||||||
|
|
||||||
virtual Status Merge(const WriteOptions& options,
|
virtual Status Merge(const WriteOptions& options,
|
||||||
|
Loading…
Reference in New Issue
Block a user