Use KeyMayExist for WriteBatch-Deletes
Summary: Introduced KeyMayExist checking during writebatch-delete and removed from Outer Delete API because it uses writebatch-delete. Added code to skip getting Table from disk if not already present in table_cache. Some renaming of variables. Introduced KeyMayExistImpl which allows checking since specified sequence number in GetImpl useful to check partially written writebatch. Changed KeyMayExist to not be pure virtual and provided a default implementation. Expanded unit-tests in db_test to check appropriately. Ran db_stress for 1 hour with ./db_stress --max_key=100000 --ops_per_thread=10000000 --delpercent=50 --filter_deletes=1 --statistics=1. Test Plan: db_stress;make check Reviewers: dhruba, haobo Reviewed By: dhruba CC: leveldb, xjin Differential Revision: https://reviews.facebook.net/D11745
This commit is contained in:
parent
d364eea1fc
commit
bf66c10b13
@ -328,7 +328,7 @@ static auto FLAGS_bytes_per_sync =
|
|||||||
leveldb::Options().bytes_per_sync;
|
leveldb::Options().bytes_per_sync;
|
||||||
|
|
||||||
// On true, deletes use bloom-filter and drop the delete if key not present
|
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||||
static bool FLAGS_deletes_check_filter_first = false;
|
static bool FLAGS_filter_deletes = false;
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
@ -1114,7 +1114,7 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
|
|||||||
options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
|
options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
|
||||||
options.max_bytes_for_level_multiplier =
|
options.max_bytes_for_level_multiplier =
|
||||||
FLAGS_max_bytes_for_level_multiplier;
|
FLAGS_max_bytes_for_level_multiplier;
|
||||||
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
options.filter_deletes = FLAGS_filter_deletes;
|
||||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) {
|
if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) {
|
||||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() !=
|
if (FLAGS_max_bytes_for_level_multiplier_additional.size() !=
|
||||||
(unsigned int)FLAGS_num_levels) {
|
(unsigned int)FLAGS_num_levels) {
|
||||||
@ -2220,9 +2220,9 @@ int main(int argc, char** argv) {
|
|||||||
FLAGS_keys_per_multiget = n;
|
FLAGS_keys_per_multiget = n;
|
||||||
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
|
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
|
||||||
FLAGS_bytes_per_sync = l;
|
FLAGS_bytes_per_sync = l;
|
||||||
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
} else if (sscanf(argv[i], "--filter_deletes=%d%c", &n, &junk)
|
||||||
== 1 && (n == 0 || n ==1 )) {
|
== 1 && (n == 0 || n ==1 )) {
|
||||||
FLAGS_deletes_check_filter_first = n;
|
FLAGS_filter_deletes = n;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -691,7 +691,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|||||||
mem = new MemTable(internal_comparator_, NumberLevels());
|
mem = new MemTable(internal_comparator_, NumberLevels());
|
||||||
mem->Ref();
|
mem->Ref();
|
||||||
}
|
}
|
||||||
status = WriteBatchInternal::InsertInto(&batch, mem);
|
status = WriteBatchInternal::InsertInto(&batch, mem, &options_);
|
||||||
MaybeIgnoreError(&status);
|
MaybeIgnoreError(&status);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
break;
|
break;
|
||||||
@ -2078,13 +2078,13 @@ Status DBImpl::Get(const ReadOptions& options,
|
|||||||
return GetImpl(options, key, value);
|
return GetImpl(options, key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no_IO is true, then returns Status::NotFound if key is not in memtable,
|
// If no_io is true, then returns Status::NotFound if key is not in memtable,
|
||||||
// immutable-memtable and bloom-filters can guarantee that key is not in db,
|
// immutable-memtable and bloom-filters can guarantee that key is not in db,
|
||||||
// "value" is garbage string if no_IO is true
|
// "value" is garbage string if no_io is true
|
||||||
Status DBImpl::GetImpl(const ReadOptions& options,
|
Status DBImpl::GetImpl(const ReadOptions& options,
|
||||||
const Slice& key,
|
const Slice& key,
|
||||||
std::string* value,
|
std::string* value,
|
||||||
const bool no_IO) {
|
const bool no_io) {
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
StopWatch sw(env_, options_.statistics, DB_GET);
|
StopWatch sw(env_, options_.statistics, DB_GET);
|
||||||
@ -2113,12 +2113,12 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
|||||||
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
||||||
// value will contain the current merge operand in the latter case.
|
// value will contain the current merge operand in the latter case.
|
||||||
LookupKey lkey(key, snapshot);
|
LookupKey lkey(key, snapshot);
|
||||||
if (mem->Get(lkey, value, &s, options_, no_IO)) {
|
if (mem->Get(lkey, value, &s, options_, no_io)) {
|
||||||
// Done
|
// Done
|
||||||
} else if (imm.Get(lkey, value, &s, options_, no_IO)) {
|
} else if (imm.Get(lkey, value, &s, options_, no_io)) {
|
||||||
// Done
|
// Done
|
||||||
} else {
|
} else {
|
||||||
current->Get(options, lkey, value, &s, &stats, options_, no_IO);
|
current->Get(options, lkey, value, &s, &stats, options_, no_io);
|
||||||
have_stat_update = true;
|
have_stat_update = true;
|
||||||
}
|
}
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
@ -2209,8 +2209,17 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool DBImpl::KeyMayExist(const Slice& key) {
|
bool DBImpl::KeyMayExist(const Slice& key) {
|
||||||
|
return KeyMayExistImpl(key, versions_->LastSequence());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DBImpl::KeyMayExistImpl(const Slice& key,
|
||||||
|
const SequenceNumber read_from_seq) {
|
||||||
std::string value;
|
std::string value;
|
||||||
const Status s = GetImpl(ReadOptions(), key, &value, true);
|
SnapshotImpl read_from_snapshot;
|
||||||
|
read_from_snapshot.number_ = read_from_seq;
|
||||||
|
ReadOptions ropts;
|
||||||
|
ropts.snapshot = &read_from_snapshot;
|
||||||
|
const Status s = GetImpl(ropts, key, &value, true);
|
||||||
return !s.IsNotFound();
|
return !s.IsNotFound();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2249,10 +2258,6 @@ Status DBImpl::Merge(const WriteOptions& o, const Slice& key,
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
||||||
if (options_.deletes_check_filter_first && !KeyMayExist(key)) {
|
|
||||||
RecordTick(options_.statistics, NUMBER_FILTERED_DELETES);
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
return DB::Delete(options, key);
|
return DB::Delete(options, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2311,7 +2316,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
status = WriteBatchInternal::InsertInto(updates, mem_);
|
status = WriteBatchInternal::InsertInto(updates, mem_, &options_, this,
|
||||||
|
options_.filter_deletes);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
// Panic for in-memory corruptions
|
// Panic for in-memory corruptions
|
||||||
// Note that existing logic was not sound. Any partial failure writing
|
// Note that existing logic was not sound. Any partial failure writing
|
||||||
|
13
db/db_impl.h
13
db/db_impl.h
@ -103,6 +103,15 @@ class DBImpl : public DB {
|
|||||||
// Trigger's a background call for testing.
|
// Trigger's a background call for testing.
|
||||||
void TEST_PurgeObsoleteteWAL();
|
void TEST_PurgeObsoleteteWAL();
|
||||||
|
|
||||||
|
// KeyMayExist's internal function, but can be called internally from rocksdb
|
||||||
|
// to check memtable from sequence_number=read_from_seq. This is useful to
|
||||||
|
// check presence of key in db when key's existence is to be also checked in
|
||||||
|
// an incompletely written WriteBatch in memtable. eg. Database doesn't have
|
||||||
|
// key A and WriteBatch=[PutA,B; DelA]. A KeyMayExist called from DelA also
|
||||||
|
// needs to check itself for any PutA to be sure to not drop the delete.
|
||||||
|
bool KeyMayExistImpl(const Slice& key,
|
||||||
|
const SequenceNumber read_from_seq);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
const std::string dbname_;
|
const std::string dbname_;
|
||||||
@ -399,11 +408,11 @@ class DBImpl : public DB {
|
|||||||
std::vector<SequenceNumber>& snapshots,
|
std::vector<SequenceNumber>& snapshots,
|
||||||
SequenceNumber* prev_snapshot);
|
SequenceNumber* prev_snapshot);
|
||||||
|
|
||||||
// Function that Get and KeyMayExist call with no_IO true or false
|
// Function that Get and KeyMayExistImpl call with no_io true or false
|
||||||
Status GetImpl(const ReadOptions& options,
|
Status GetImpl(const ReadOptions& options,
|
||||||
const Slice& key,
|
const Slice& key,
|
||||||
std::string* value,
|
std::string* value,
|
||||||
const bool no_IO = false);
|
const bool no_io = false);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Sanitize db options. The caller should delete result.info_log if
|
// Sanitize db options. The caller should delete result.info_log if
|
||||||
|
@ -291,7 +291,7 @@ class DBTest {
|
|||||||
// TODO -- test more options
|
// TODO -- test more options
|
||||||
break;
|
break;
|
||||||
case kDeletesFilterFirst:
|
case kDeletesFilterFirst:
|
||||||
options.deletes_check_filter_first = true;
|
options.filter_deletes = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -805,6 +805,44 @@ TEST(DBTest, KeyMayExist) {
|
|||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A delete is skipped for key if KeyMayExist(key) returns False
|
||||||
|
// Tests Writebatch consistency and proper delete behaviour
|
||||||
|
TEST(DBTest, FilterDeletes) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.filter_policy = NewBloomFilterPolicy(20);
|
||||||
|
options.filter_deletes = true;
|
||||||
|
Reopen(&options);
|
||||||
|
WriteBatch batch;
|
||||||
|
|
||||||
|
batch.Delete("a");
|
||||||
|
dbfull()->Write(WriteOptions(), &batch);
|
||||||
|
ASSERT_EQ(AllEntriesFor("a"), "[ ]"); // Delete skipped
|
||||||
|
batch.Clear();
|
||||||
|
|
||||||
|
batch.Put("a", "b");
|
||||||
|
batch.Delete("a");
|
||||||
|
dbfull()->Write(WriteOptions(), &batch);
|
||||||
|
ASSERT_EQ(Get("a"), "NOT_FOUND");
|
||||||
|
ASSERT_EQ(AllEntriesFor("a"), "[ DEL, b ]"); // Delete issued
|
||||||
|
batch.Clear();
|
||||||
|
|
||||||
|
batch.Delete("c");
|
||||||
|
batch.Put("c", "d");
|
||||||
|
dbfull()->Write(WriteOptions(), &batch);
|
||||||
|
ASSERT_EQ(Get("c"), "d");
|
||||||
|
ASSERT_EQ(AllEntriesFor("c"), "[ d ]"); // Delete skipped
|
||||||
|
batch.Clear();
|
||||||
|
|
||||||
|
dbfull()->Flush(FlushOptions()); // A stray Flush
|
||||||
|
|
||||||
|
batch.Delete("c");
|
||||||
|
dbfull()->Write(WriteOptions(), &batch);
|
||||||
|
ASSERT_EQ(AllEntriesFor("c"), "[ DEL, d ]"); // Delete issued
|
||||||
|
batch.Clear();
|
||||||
|
|
||||||
|
delete options.filter_policy;
|
||||||
|
}
|
||||||
|
|
||||||
TEST(DBTest, IterEmpty) {
|
TEST(DBTest, IterEmpty) {
|
||||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||||
|
|
||||||
@ -3192,6 +3230,9 @@ static bool CompareIterators(int step,
|
|||||||
TEST(DBTest, Randomized) {
|
TEST(DBTest, Randomized) {
|
||||||
Random rnd(test::RandomSeed());
|
Random rnd(test::RandomSeed());
|
||||||
do {
|
do {
|
||||||
|
if (CurrentOptions().filter_deletes) {
|
||||||
|
ChangeOptions(); // DBTest.Randomized not suited for filter_deletes
|
||||||
|
}
|
||||||
ModelDB model(CurrentOptions());
|
ModelDB model(CurrentOptions());
|
||||||
const int N = 10000;
|
const int N = 10000;
|
||||||
const Snapshot* model_snap = nullptr;
|
const Snapshot* model_snap = nullptr;
|
||||||
|
@ -39,15 +39,19 @@ TableCache::~TableCache() {
|
|||||||
|
|
||||||
Status TableCache::FindTable(const EnvOptions& toptions,
|
Status TableCache::FindTable(const EnvOptions& toptions,
|
||||||
uint64_t file_number, uint64_t file_size,
|
uint64_t file_number, uint64_t file_size,
|
||||||
Cache::Handle** handle, bool* tableIO) {
|
Cache::Handle** handle, bool* table_io,
|
||||||
|
const bool no_io) {
|
||||||
Status s;
|
Status s;
|
||||||
char buf[sizeof(file_number)];
|
char buf[sizeof(file_number)];
|
||||||
EncodeFixed64(buf, file_number);
|
EncodeFixed64(buf, file_number);
|
||||||
Slice key(buf, sizeof(buf));
|
Slice key(buf, sizeof(buf));
|
||||||
*handle = cache_->Lookup(key);
|
*handle = cache_->Lookup(key);
|
||||||
if (*handle == nullptr) {
|
if (*handle == nullptr) {
|
||||||
if (tableIO != nullptr) {
|
if (no_io) { // Dont do IO and return a not-found status
|
||||||
*tableIO = true; // we had to do IO from storage
|
return Status::NotFound("Table not found in table_cache, no_io is set");
|
||||||
|
}
|
||||||
|
if (table_io != nullptr) {
|
||||||
|
*table_io = true; // we had to do IO from storage
|
||||||
}
|
}
|
||||||
std::string fname = TableFileName(dbname_, file_number);
|
std::string fname = TableFileName(dbname_, file_number);
|
||||||
unique_ptr<RandomAccessFile> file;
|
unique_ptr<RandomAccessFile> file;
|
||||||
@ -112,17 +116,21 @@ Status TableCache::Get(const ReadOptions& options,
|
|||||||
const Slice& k,
|
const Slice& k,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*saver)(void*, const Slice&, const Slice&, bool),
|
bool (*saver)(void*, const Slice&, const Slice&, bool),
|
||||||
bool* tableIO,
|
bool* table_io,
|
||||||
void (*mark_key_may_exist)(void*),
|
void (*mark_key_may_exist)(void*),
|
||||||
const bool no_IO) {
|
const bool no_io) {
|
||||||
Cache::Handle* handle = nullptr;
|
Cache::Handle* handle = nullptr;
|
||||||
Status s = FindTable(storage_options_, file_number, file_size,
|
Status s = FindTable(storage_options_, file_number, file_size,
|
||||||
&handle, tableIO);
|
&handle, table_io, no_io);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
Table* t =
|
Table* t =
|
||||||
reinterpret_cast<Table*>(cache_->Value(handle));
|
reinterpret_cast<Table*>(cache_->Value(handle));
|
||||||
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist, no_IO);
|
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist, no_io);
|
||||||
cache_->Release(handle);
|
cache_->Release(handle);
|
||||||
|
} else if (no_io && s.IsNotFound()) {
|
||||||
|
// Couldnt find Table in cache but treat as kFound if no_io set
|
||||||
|
(*mark_key_may_exist)(arg);
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
@ -48,9 +48,9 @@ class TableCache {
|
|||||||
const Slice& k,
|
const Slice& k,
|
||||||
void* arg,
|
void* arg,
|
||||||
bool (*handle_result)(void*, const Slice&, const Slice&, bool),
|
bool (*handle_result)(void*, const Slice&, const Slice&, bool),
|
||||||
bool* tableIO,
|
bool* table_io,
|
||||||
void (*mark_key_may_exist)(void*) = nullptr,
|
void (*mark_key_may_exist)(void*) = nullptr,
|
||||||
const bool no_IO = false);
|
const bool no_io = false);
|
||||||
|
|
||||||
// Evict any entry for the specified file number
|
// Evict any entry for the specified file number
|
||||||
void Evict(uint64_t file_number);
|
void Evict(uint64_t file_number);
|
||||||
@ -62,9 +62,9 @@ class TableCache {
|
|||||||
const EnvOptions& storage_options_;
|
const EnvOptions& storage_options_;
|
||||||
std::shared_ptr<Cache> cache_;
|
std::shared_ptr<Cache> cache_;
|
||||||
|
|
||||||
Status FindTable(const EnvOptions& toptions,
|
Status FindTable(const EnvOptions& toptions, uint64_t file_number,
|
||||||
uint64_t file_number, uint64_t file_size, Cache::Handle**,
|
uint64_t file_size, Cache::Handle**, bool* table_io=nullptr,
|
||||||
bool* tableIO = nullptr);
|
const bool no_io = false);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace leveldb
|
} // namespace leveldb
|
||||||
|
@ -339,7 +339,7 @@ void Version::Get(const ReadOptions& options,
|
|||||||
Status *status,
|
Status *status,
|
||||||
GetStats* stats,
|
GetStats* stats,
|
||||||
const Options& db_options,
|
const Options& db_options,
|
||||||
const bool no_IO) {
|
const bool no_io) {
|
||||||
Slice ikey = k.internal_key();
|
Slice ikey = k.internal_key();
|
||||||
Slice user_key = k.user_key();
|
Slice user_key = k.user_key();
|
||||||
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
||||||
@ -348,7 +348,7 @@ void Version::Get(const ReadOptions& options,
|
|||||||
auto logger = db_options.info_log;
|
auto logger = db_options.info_log;
|
||||||
|
|
||||||
assert(status->ok() || status->IsMergeInProgress());
|
assert(status->ok() || status->IsMergeInProgress());
|
||||||
if (no_IO) {
|
if (no_io) {
|
||||||
assert(status->ok());
|
assert(status->ok());
|
||||||
}
|
}
|
||||||
Saver saver;
|
Saver saver;
|
||||||
@ -419,7 +419,7 @@ void Version::Get(const ReadOptions& options,
|
|||||||
bool tableIO = false;
|
bool tableIO = false;
|
||||||
*status = vset_->table_cache_->Get(options, f->number, f->file_size,
|
*status = vset_->table_cache_->Get(options, f->number, f->file_size,
|
||||||
ikey, &saver, SaveValue, &tableIO,
|
ikey, &saver, SaveValue, &tableIO,
|
||||||
MarkKeyMayExist, no_IO);
|
MarkKeyMayExist, no_io);
|
||||||
// TODO: examine the behavior for corrupted key
|
// TODO: examine the behavior for corrupted key
|
||||||
if (!status->ok()) {
|
if (!status->ok()) {
|
||||||
return;
|
return;
|
||||||
|
@ -75,7 +75,7 @@ class Version {
|
|||||||
};
|
};
|
||||||
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
||||||
Status* status, GetStats* stats, const Options& db_option,
|
Status* status, GetStats* stats, const Options& db_option,
|
||||||
const bool no_IO = false);
|
const bool no_io = false);
|
||||||
|
|
||||||
// Adds "stats" into the current state. Returns true if a new
|
// Adds "stats" into the current state. Returns true if a new
|
||||||
// compaction may need to be triggered, false otherwise.
|
// compaction may need to be triggered, false otherwise.
|
||||||
|
@ -16,8 +16,9 @@
|
|||||||
|
|
||||||
#include "leveldb/write_batch.h"
|
#include "leveldb/write_batch.h"
|
||||||
|
|
||||||
#include "leveldb/db.h"
|
#include "leveldb/statistics.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
|
#include "db/db_impl.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
@ -139,6 +140,23 @@ class MemTableInserter : public WriteBatch::Handler {
|
|||||||
public:
|
public:
|
||||||
SequenceNumber sequence_;
|
SequenceNumber sequence_;
|
||||||
MemTable* mem_;
|
MemTable* mem_;
|
||||||
|
const Options* options_;
|
||||||
|
DBImpl* db_;
|
||||||
|
const bool filter_deletes_;
|
||||||
|
|
||||||
|
MemTableInserter(SequenceNumber sequence, MemTable* mem, const Options* opts,
|
||||||
|
DB* db, const bool filter_deletes)
|
||||||
|
: sequence_(sequence),
|
||||||
|
mem_(mem),
|
||||||
|
options_(opts),
|
||||||
|
db_(reinterpret_cast<DBImpl*>(db)),
|
||||||
|
filter_deletes_(filter_deletes) {
|
||||||
|
assert(mem_);
|
||||||
|
if (filter_deletes_) {
|
||||||
|
assert(options_);
|
||||||
|
assert(db_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
virtual void Put(const Slice& key, const Slice& value) {
|
virtual void Put(const Slice& key, const Slice& value) {
|
||||||
mem_->Add(sequence_, kTypeValue, key, value);
|
mem_->Add(sequence_, kTypeValue, key, value);
|
||||||
@ -149,17 +167,21 @@ class MemTableInserter : public WriteBatch::Handler {
|
|||||||
sequence_++;
|
sequence_++;
|
||||||
}
|
}
|
||||||
virtual void Delete(const Slice& key) {
|
virtual void Delete(const Slice& key) {
|
||||||
|
if (filter_deletes_ && !db_->KeyMayExistImpl(key, sequence_)) {
|
||||||
|
RecordTick(options_->statistics, NUMBER_FILTERED_DELETES);
|
||||||
|
return;
|
||||||
|
}
|
||||||
mem_->Add(sequence_, kTypeDeletion, key, Slice());
|
mem_->Add(sequence_, kTypeDeletion, key, Slice());
|
||||||
sequence_++;
|
sequence_++;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* mem,
|
||||||
MemTable* memtable) {
|
const Options* opts, DB* db,
|
||||||
MemTableInserter inserter;
|
const bool filter_deletes) {
|
||||||
inserter.sequence_ = WriteBatchInternal::Sequence(b);
|
MemTableInserter inserter(WriteBatchInternal::Sequence(b), mem, opts, db,
|
||||||
inserter.mem_ = memtable;
|
filter_deletes);
|
||||||
return b->Iterate(&inserter);
|
return b->Iterate(&inserter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include "leveldb/types.h"
|
#include "leveldb/types.h"
|
||||||
#include "leveldb/write_batch.h"
|
#include "leveldb/write_batch.h"
|
||||||
|
#include "leveldb/db.h"
|
||||||
|
#include "leveldb/options.h"
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
@ -39,7 +41,12 @@ class WriteBatchInternal {
|
|||||||
|
|
||||||
static void SetContents(WriteBatch* batch, const Slice& contents);
|
static void SetContents(WriteBatch* batch, const Slice& contents);
|
||||||
|
|
||||||
static Status InsertInto(const WriteBatch* batch, MemTable* memtable);
|
// Inserts batch entries into memtable
|
||||||
|
// Drops deletes in batch if filter_del is set to true and
|
||||||
|
// db->KeyMayExist returns false
|
||||||
|
static Status InsertInto(const WriteBatch* batch, MemTable* memtable,
|
||||||
|
const Options* opts = nullptr, DB* db = nullptr,
|
||||||
|
const bool filter_del = false);
|
||||||
|
|
||||||
static void Append(WriteBatch* dst, const WriteBatch* src);
|
static void Append(WriteBatch* dst, const WriteBatch* src);
|
||||||
};
|
};
|
||||||
|
@ -122,8 +122,12 @@ class DB {
|
|||||||
|
|
||||||
// If the key definitely does not exist in the database, then this method
|
// If the key definitely does not exist in the database, then this method
|
||||||
// returns false. Otherwise return true. This check is potentially
|
// returns false. Otherwise return true. This check is potentially
|
||||||
// lighter-weight than invoking DB::Get(). No IO is performed
|
// lighter-weight than invoking DB::Get(). One way to make this lighter weight
|
||||||
virtual bool KeyMayExist(const Slice& key) = 0;
|
// is to avoid doing any IOs
|
||||||
|
// Default implementation here returns true
|
||||||
|
virtual bool KeyMayExist(const Slice& key) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Return a heap-allocated iterator over the contents of the database.
|
// Return a heap-allocated iterator over the contents of the database.
|
||||||
// The result of NewIterator() is initially invalid (caller must
|
// The result of NewIterator() is initially invalid (caller must
|
||||||
|
@ -472,7 +472,7 @@ struct Options {
|
|||||||
// only incurs in-memory look up. This optimization avoids writing the delete
|
// only incurs in-memory look up. This optimization avoids writing the delete
|
||||||
// to storage when appropriate.
|
// to storage when appropriate.
|
||||||
// Default: false
|
// Default: false
|
||||||
bool deletes_check_filter_first;
|
bool filter_deletes;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -324,7 +324,7 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
|||||||
bool (*saver)(void*, const Slice&, const Slice&,
|
bool (*saver)(void*, const Slice&, const Slice&,
|
||||||
bool),
|
bool),
|
||||||
void (*mark_key_may_exist)(void*),
|
void (*mark_key_may_exist)(void*),
|
||||||
const bool no_IO) {
|
const bool no_io) {
|
||||||
Status s;
|
Status s;
|
||||||
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
||||||
bool done = false;
|
bool done = false;
|
||||||
@ -340,9 +340,9 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
|||||||
// cross one data block, we should be fine.
|
// cross one data block, we should be fine.
|
||||||
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
|
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
|
||||||
break;
|
break;
|
||||||
} else if (no_IO) {
|
} else if (no_io) {
|
||||||
// Update Saver.state to Found because we are only looking for whether
|
// Update Saver.state to Found because we are only looking for whether
|
||||||
// bloom-filter can guarantee the key is not there when "no_IO"
|
// bloom-filter can guarantee the key is not there when "no_io"
|
||||||
(*mark_key_may_exist)(arg);
|
(*mark_key_may_exist)(arg);
|
||||||
done = true;
|
done = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -88,7 +88,7 @@ class Table {
|
|||||||
void* arg,
|
void* arg,
|
||||||
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
|
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
|
||||||
void (*mark_key_may_exist)(void*) = nullptr,
|
void (*mark_key_may_exist)(void*) = nullptr,
|
||||||
const bool no_IO = false);
|
const bool no_io = false);
|
||||||
|
|
||||||
|
|
||||||
void ReadMeta(const Footer& footer);
|
void ReadMeta(const Footer& footer);
|
||||||
|
@ -181,7 +181,7 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock
|
|||||||
static uint32_t FLAGS_purge_redundant_percent = 50;
|
static uint32_t FLAGS_purge_redundant_percent = 50;
|
||||||
|
|
||||||
// On true, deletes use bloom-filter and drop the delete if key not present
|
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||||
static bool FLAGS_deletes_check_filter_first = false;
|
static bool FLAGS_filter_deletes = false;
|
||||||
|
|
||||||
// Level0 compaction start trigger
|
// Level0 compaction start trigger
|
||||||
static int FLAGS_level0_file_num_compaction_trigger = 0;
|
static int FLAGS_level0_file_num_compaction_trigger = 0;
|
||||||
@ -904,7 +904,7 @@ class StressTest {
|
|||||||
fprintf(stdout, "Purge redundant %% : %d\n",
|
fprintf(stdout, "Purge redundant %% : %d\n",
|
||||||
FLAGS_purge_redundant_percent);
|
FLAGS_purge_redundant_percent);
|
||||||
fprintf(stdout, "Deletes use filter : %d\n",
|
fprintf(stdout, "Deletes use filter : %d\n",
|
||||||
FLAGS_deletes_check_filter_first);
|
FLAGS_filter_deletes);
|
||||||
fprintf(stdout, "Num keys per lock : %d\n",
|
fprintf(stdout, "Num keys per lock : %d\n",
|
||||||
1 << FLAGS_log2_keys_per_lock);
|
1 << FLAGS_log2_keys_per_lock);
|
||||||
|
|
||||||
@ -960,7 +960,7 @@ class StressTest {
|
|||||||
options.delete_obsolete_files_period_micros =
|
options.delete_obsolete_files_period_micros =
|
||||||
FLAGS_delete_obsolete_files_period_micros;
|
FLAGS_delete_obsolete_files_period_micros;
|
||||||
options.max_manifest_file_size = 1024;
|
options.max_manifest_file_size = 1024;
|
||||||
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
options.filter_deletes = FLAGS_filter_deletes;
|
||||||
static Random purge_percent(1000); // no benefit from non-determinism here
|
static Random purge_percent(1000); // no benefit from non-determinism here
|
||||||
if (purge_percent.Uniform(100) < FLAGS_purge_redundant_percent - 1) {
|
if (purge_percent.Uniform(100) < FLAGS_purge_redundant_percent - 1) {
|
||||||
options.purge_redundant_kvs_while_flush = false;
|
options.purge_redundant_kvs_while_flush = false;
|
||||||
@ -1160,9 +1160,9 @@ int main(int argc, char** argv) {
|
|||||||
} else if (sscanf(argv[i], "--purge_redundant_percent=%d%c", &n, &junk) == 1
|
} else if (sscanf(argv[i], "--purge_redundant_percent=%d%c", &n, &junk) == 1
|
||||||
&& (n >= 0 && n <= 100)) {
|
&& (n >= 0 && n <= 100)) {
|
||||||
FLAGS_purge_redundant_percent = n;
|
FLAGS_purge_redundant_percent = n;
|
||||||
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
} else if (sscanf(argv[i], "--filter_deletes=%d%c", &n, &junk)
|
||||||
== 1 && (n == 0 || n == 1)) {
|
== 1 && (n == 0 || n == 1)) {
|
||||||
FLAGS_deletes_check_filter_first = n;
|
FLAGS_filter_deletes = n;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||||
exit(1);
|
exit(1);
|
||||||
|
@ -75,7 +75,7 @@ Options::Options()
|
|||||||
access_hint_on_compaction_start(NORMAL),
|
access_hint_on_compaction_start(NORMAL),
|
||||||
use_adaptive_mutex(false),
|
use_adaptive_mutex(false),
|
||||||
bytes_per_sync(0),
|
bytes_per_sync(0),
|
||||||
deletes_check_filter_first(false) {
|
filter_deletes(false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* const access_hints[] = {
|
static const char* const access_hints[] = {
|
||||||
@ -209,8 +209,8 @@ Options::Dump(Logger* log) const
|
|||||||
use_adaptive_mutex);
|
use_adaptive_mutex);
|
||||||
Log(log," Options.bytes_per_sync: %ld",
|
Log(log," Options.bytes_per_sync: %ld",
|
||||||
bytes_per_sync);
|
bytes_per_sync);
|
||||||
Log(log," Options.deletes_check_filter_first: %d",
|
Log(log," Options.filter_deletes: %d",
|
||||||
deletes_check_filter_first);
|
filter_deletes);
|
||||||
} // Options::Dump
|
} // Options::Dump
|
||||||
|
|
||||||
//
|
//
|
||||||
|
Loading…
Reference in New Issue
Block a user