Expand KeyMayExist to return the proper value if it can be found in memory and also check block_cache

Summary: Removed KeyMayExistImpl because KeyMayExist demanded Get like semantics now. Removed no_io from memtable and imm because we need the proper value now and shouldn't just stop when we see Merge in memtable. Added checks to block_cache. Updated documentation and unit-test

Test Plan: make all check;db_stress for 1 hour

Reviewers: dhruba, haobo

Reviewed By: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D11853
This commit is contained in:
Mayank Agarwal 2013-07-26 12:57:01 -07:00
parent 9700677a2b
commit 59d0b02f8b
17 changed files with 126 additions and 91 deletions

View File

@ -2093,13 +2093,11 @@ Status DBImpl::Get(const ReadOptions& options,
return GetImpl(options, key, value);
}
// If no_io is true, then returns Status::NotFound if key is not in memtable,
// immutable-memtable and bloom-filters can guarantee that key is not in db,
// "value" is garbage string if no_io is true
Status DBImpl::GetImpl(const ReadOptions& options,
const Slice& key,
std::string* value,
const bool no_io) {
const bool no_io,
bool* value_found) {
Status s;
StopWatch sw(env_, options_.statistics, DB_GET);
@ -2128,12 +2126,12 @@ Status DBImpl::GetImpl(const ReadOptions& options,
// s is both in/out. When in, s could either be OK or MergeInProgress.
// value will contain the current merge operand in the latter case.
LookupKey lkey(key, snapshot);
if (mem->Get(lkey, value, &s, options_, no_io)) {
if (mem->Get(lkey, value, &s, options_)) {
// Done
} else if (imm.Get(lkey, value, &s, options_, no_io)) {
} else if (imm.Get(lkey, value, &s, options_)) {
// Done
} else {
current->Get(options, lkey, value, &s, &stats, options_, no_io);
current->Get(options, lkey, value, &s, &stats, options_, no_io,value_found);
have_stat_update = true;
}
mutex_.Lock();
@ -2223,19 +2221,14 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
return statList;
}
bool DBImpl::KeyMayExist(const Slice& key) {
return KeyMayExistImpl(key, versions_->LastSequence());
}
bool DBImpl::KeyMayExistImpl(const Slice& key,
const SequenceNumber read_from_seq) {
std::string value;
SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = read_from_seq;
ReadOptions ropts;
ropts.snapshot = &read_from_snapshot;
const Status s = GetImpl(ropts, key, &value, true);
return !s.IsNotFound();
bool DBImpl::KeyMayExist(const ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found) {
if (value_found != nullptr) {
*value_found = true; // falsify later if key-may-exist but can't fetch value
}
return GetImpl(options, key, value, true, value_found).ok();
}
Iterator* DBImpl::NewIterator(const ReadOptions& options) {

View File

@ -50,9 +50,14 @@ class DBImpl : public DB {
const std::vector<Slice>& keys,
std::vector<std::string>* values);
// Returns false if key can't exist- based on memtable, immutable-memtable and
// bloom-filters; true otherwise. No IO is performed
virtual bool KeyMayExist(const Slice& key);
// Returns false if key doesn't exist in the database and true if it may.
// If value_found is not passed in as null, then return the value if found in
// memory. On return, if value was found, then value_found will be set to true
// , otherwise false.
virtual bool KeyMayExist(const ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found = nullptr);
virtual Iterator* NewIterator(const ReadOptions&);
virtual const Snapshot* GetSnapshot();
virtual void ReleaseSnapshot(const Snapshot* snapshot);
@ -104,15 +109,6 @@ class DBImpl : public DB {
// Trigger's a background call for testing.
void TEST_PurgeObsoleteteWAL();
// KeyMayExist's internal function, but can be called internally from rocksdb
// to check memtable from sequence_number=read_from_seq. This is useful to
// check presence of key in db when key's existence is to be also checked in
// an incompletely written WriteBatch in memtable. eg. Database doesn't have
// key A and WriteBatch=[PutA,B; DelA]. A KeyMayExist called from DelA also
// needs to check itself for any PutA to be sure to not drop the delete.
bool KeyMayExistImpl(const Slice& key,
const SequenceNumber read_from_seq);
protected:
Env* const env_;
const std::string dbname_;
@ -415,11 +411,13 @@ class DBImpl : public DB {
std::vector<SequenceNumber>& snapshots,
SequenceNumber* prev_snapshot);
// Function that Get and KeyMayExistImpl call with no_io true or false
// Function that Get and KeyMayExist call with no_io true or false
// Note: 'value_found' from KeyMayExist propagates here
Status GetImpl(const ReadOptions& options,
const Slice& key,
std::string* value,
const bool no_io = false);
const bool no_io = false,
bool* value_found = nullptr);
};
// Sanitize db options. The caller should delete result.info_log if

View File

@ -772,34 +772,41 @@ TEST(DBTest, GetEncountersEmptyLevel) {
} while (ChangeOptions());
}
// KeyMayExist-API returns false if memtable(s) and in-memory bloom-filters can
// guarantee that the key doesn't exist in the db, else true. This can lead to
// a few false positives, but not false negatives. To make test deterministic,
// use a much larger number of bits per key-20 than bits in the key, so
// that false positives are eliminated
// KeyMayExist can lead to a few false positives, but not false negatives.
// To make test deterministic, use a much larger number of bits per key-20 than
// bits in the key, so that false positives are eliminated
TEST(DBTest, KeyMayExist) {
do {
ReadOptions ropts;
std::string value;
Options options = CurrentOptions();
options.filter_policy = NewBloomFilterPolicy(20);
Reopen(&options);
ASSERT_TRUE(!db_->KeyMayExist("a"));
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
ASSERT_OK(db_->Put(WriteOptions(), "a", "b"));
ASSERT_TRUE(db_->KeyMayExist("a"));
bool value_found = false;
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
ASSERT_TRUE(value_found);
ASSERT_EQ("b", value);
dbfull()->Flush(FlushOptions());
ASSERT_TRUE(db_->KeyMayExist("a"));
value.clear();
value_found = false;
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
ASSERT_TRUE(value_found);
ASSERT_EQ("b", value);
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
ASSERT_TRUE(!db_->KeyMayExist("a"));
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
dbfull()->Flush(FlushOptions());
dbfull()->CompactRange(nullptr, nullptr);
ASSERT_TRUE(!db_->KeyMayExist("a"));
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
ASSERT_TRUE(!db_->KeyMayExist("c"));
ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value));
delete options.filter_policy;
} while (ChangeOptions());
@ -3045,7 +3052,13 @@ class ModelDB: public DB {
Status::NotSupported("Not implemented."));
return s;
}
virtual bool KeyMayExist(const Slice& key) {
virtual bool KeyMayExist(const ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found = nullptr) {
if (value_found != nullptr) {
*value_found = false;
}
return true; // Not Supported directly
}
virtual Iterator* NewIterator(const ReadOptions& options) {

View File

@ -130,7 +130,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
}
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
const Options& options, const bool check_presence_only) {
const Options& options) {
Slice memkey = key.memtable_key();
std::shared_ptr<MemTableRep::Iterator> iter(table_.get()->GetIterator());
iter->Seek(memkey.data());
@ -174,10 +174,6 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
return true;
}
case kTypeMerge: {
if (check_presence_only) {
*s = Status::OK();
return true;
}
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
if (merge_in_progress) {
merge_operator->Merge(key.user_key(), &v, operand,

View File

@ -73,13 +73,12 @@ class MemTable {
// If memtable contains a deletion for key, store a NotFound() error
// in *status and return true.
// If memtable contains Merge operation as the most recent entry for a key,
// and if check_presence_only is set, return true with Status::OK,
// else if the merge process does not stop (not reaching a value or delete),
// and the merge process does not stop (not reaching a value or delete),
// store the current merged result in value and MergeInProgress in s.
// return false
// Else, return false.
bool Get(const LookupKey& key, std::string* value, Status* s,
const Options& options, const bool check_presence_only = false);
const Options& options);
// Returns the edits area that is needed for flushing the memtable
VersionEdit* GetEdits() { return &edit_; }

View File

@ -194,10 +194,10 @@ size_t MemTableList::ApproximateMemoryUsage() {
// Search all the memtables starting from the most recent one.
// Return the most recent value found, if any.
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s,
const Options& options, const bool check_presence_only) {
const Options& options) {
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end(); ++it) {
if ((*it)->Get(key, value, s, options, check_presence_only)) {
if ((*it)->Get(key, value, s, options)) {
return true;
}
}

View File

@ -70,7 +70,7 @@ class MemTableList {
// Search all the memtables starting from the most recent one.
// Return the most recent value found, if any.
bool Get(const LookupKey& key, std::string* value, Status* s,
const Options& options, const bool check_presence_only = false);
const Options& options);
// Returns the list of underlying memtables.
void GetMemTables(std::vector<MemTable*>* list);

View File

@ -238,6 +238,7 @@ struct Saver {
SaverState state;
const Comparator* ucmp;
Slice user_key;
bool* value_found; // Is value set correctly? Used by KeyMayExist
std::string* value;
const MergeOperator* merge_operator;
Logger* logger;
@ -245,13 +246,17 @@ struct Saver {
};
}
// Called from TableCache::Get when bloom-filters can't guarantee that key does
// not exist and Get is not permitted to do IO to read the data-block and be
// certain.
// Set the key as Found and let the caller know that key-may-exist
// Called from TableCache::Get and InternalGet when file/block in which key may
// exist are not there in TableCache/BlockCache respectively. In this case we
// can't guarantee that key does not exist and are not permitted to do IO to be
// certain.Set the status=kFound and value_found=false to let the caller know
// that key may exist but is not there in memory
static void MarkKeyMayExist(void* arg) {
Saver* s = reinterpret_cast<Saver*>(arg);
s->state = kFound;
if (s->value_found != nullptr) {
*(s->value_found) = false;
}
}
static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
@ -339,7 +344,8 @@ void Version::Get(const ReadOptions& options,
Status *status,
GetStats* stats,
const Options& db_options,
const bool no_io) {
const bool no_io,
bool* value_found) {
Slice ikey = k.internal_key();
Slice user_key = k.user_key();
const Comparator* ucmp = vset_->icmp_.user_comparator();
@ -355,6 +361,7 @@ void Version::Get(const ReadOptions& options,
saver.state = status->ok()? kNotFound : kMerge;
saver.ucmp = ucmp;
saver.user_key = user_key;
saver.value_found = value_found;
saver.value = value;
saver.merge_operator = merge_operator;
saver.logger = logger.get();

View File

@ -75,7 +75,7 @@ class Version {
};
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
Status* status, GetStats* stats, const Options& db_option,
const bool no_io = false);
const bool no_io = false, bool* value_found = nullptr);
// Adds "stats" into the current state. Returns true if a new
// compaction may need to be triggered, false otherwise.

View File

@ -16,10 +16,12 @@
#include "leveldb/write_batch.h"
#include "leveldb/options.h"
#include "leveldb/statistics.h"
#include "db/dbformat.h"
#include "db/db_impl.h"
#include "db/memtable.h"
#include "db/snapshot.h"
#include "db/write_batch_internal.h"
#include "util/coding.h"
#include <stdexcept>
@ -167,10 +169,17 @@ class MemTableInserter : public WriteBatch::Handler {
sequence_++;
}
virtual void Delete(const Slice& key) {
if (filter_deletes_ && !db_->KeyMayExistImpl(key, sequence_)) {
if (filter_deletes_) {
SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = sequence_;
ReadOptions ropts;
ropts.snapshot = &read_from_snapshot;
std::string value;
if (!db_->KeyMayExist(ropts, key, &value)) {
RecordTick(options_->statistics, NUMBER_FILTERED_DELETES);
return;
}
}
mem_->Add(sequence_, kTypeDeletion, key, Slice());
sequence_++;
}

View File

@ -104,7 +104,8 @@ class DB {
//
// May return some other Status on an error.
virtual Status Get(const ReadOptions& options,
const Slice& key, std::string* value) = 0;
const Slice& key,
std::string* value) = 0;
// If keys[i] does not exist in the database, then the i'th returned
// status will be one for which Status::IsNotFound() is true, and
@ -121,11 +122,19 @@ class DB {
std::vector<std::string>* values) = 0;
// If the key definitely does not exist in the database, then this method
// returns false. Otherwise return true. This check is potentially
// lighter-weight than invoking DB::Get(). One way to make this lighter weight
// is to avoid doing any IOs
// Default implementation here returns true
virtual bool KeyMayExist(const Slice& key) {
// returns false, else true. If the caller wants to obtain value when the key
// is found in memory, a bool for 'value_found' must be passed. 'value_found'
// will be true on return if value has been set properly.
// This check is potentially lighter-weight than invoking DB::Get(). One way
// to make this lighter weight is to avoid doing any IOs.
// Default implementation here returns true and sets 'value_found' to false
virtual bool KeyMayExist(const ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found = nullptr) {
if (value_found != nullptr) {
*value_found = false;
}
return true;
}

View File

@ -473,12 +473,10 @@ struct Options {
// Default: 0
uint64_t bytes_per_sync;
// Use bloom-filter for deletes when this is true.
// db->Delete first calls KeyMayExist which checks memtable,immutable-memtable
// and bloom-filters to determine if the key does not exist in the database.
// If the key definitely does not exist, then the delete is a noop.KeyMayExist
// only incurs in-memory look up. This optimization avoids writing the delete
// to storage when appropriate.
// Use KeyMayExist API to filter deletes when this is true.
// If KeyMayExist returns false, i.e. the key definitely does not exist, then
// the delete is a noop. KeyMayExist only incurs in-memory look up.
// This optimization avoids writing the delete to storage when appropriate.
// Default: false
bool filter_deletes;

View File

@ -235,7 +235,8 @@ Iterator* Table::BlockReader(void* arg,
const ReadOptions& options,
const Slice& index_value,
bool* didIO,
bool for_compaction) {
bool for_compaction,
const bool no_io) {
Table* table = reinterpret_cast<Table*>(arg);
Cache* block_cache = table->rep_->options.block_cache.get();
std::shared_ptr<Statistics> statistics = table->rep_->options.statistics;
@ -264,6 +265,8 @@ Iterator* Table::BlockReader(void* arg,
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
RecordTick(statistics, BLOCK_CACHE_HIT);
} else if (no_io) {
return nullptr; // Did not find in block_cache and can't do IO
} else {
Histograms histogram = for_compaction ?
READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
@ -286,7 +289,9 @@ Iterator* Table::BlockReader(void* arg,
RecordTick(statistics, BLOCK_CACHE_MISS);
}
} else {
} else if (no_io) {
return nullptr; // Could not read from block_cache and can't do IO
}else {
s = ReadBlock(table->rep_->file.get(), options, handle, &block, didIO);
}
}
@ -340,16 +345,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
// cross one data block, we should be fine.
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
break;
} else if (no_io) {
// Update Saver.state to Found because we are only looking for whether
// bloom-filter can guarantee the key is not there when "no_io"
(*mark_key_may_exist)(arg);
done = true;
} else {
bool didIO = false;
Iterator* block_iter = BlockReader(this, options, iiter->value(),
&didIO);
&didIO, no_io);
if (no_io && !block_iter) { // couldn't get block from block_cache
// Update Saver.state to Found because we are only looking for whether
// we can guarantee the key is not there when "no_io" is set
(*mark_key_may_exist)(arg);
break;
}
for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) {
if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) {
done = true;

View File

@ -77,7 +77,8 @@ class Table {
const EnvOptions& soptions, const Slice&,
bool for_compaction);
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&,
bool* didIO, bool for_compaction = false);
bool* didIO, bool for_compaction = false,
const bool no_io = false);
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
// after a call to Seek(key), until handle_result returns false.

View File

@ -180,7 +180,7 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock
// Percentage of times we want to purge redundant keys in memory before flushing
static uint32_t FLAGS_purge_redundant_percent = 50;
// On true, deletes use bloom-filter and drop the delete if key not present
// On true, deletes use KeyMayExist to drop the delete if key not present
static bool FLAGS_filter_deletes = false;
// Level0 compaction start trigger

View File

@ -158,8 +158,11 @@ std::vector<Status> DBWithTTL::MultiGet(const ReadOptions& options,
supported with TTL"));
}
bool DBWithTTL::KeyMayExist(const Slice& key) {
return db_->KeyMayExist(key);
bool DBWithTTL::KeyMayExist(ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found) {
return db_->KeyMayExist(options, key, value, value_found);
}
Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) {

View File

@ -33,7 +33,10 @@ class DBWithTTL : public DB, CompactionFilter {
const std::vector<Slice>& keys,
std::vector<std::string>* values);
virtual bool KeyMayExist(const Slice& key);
virtual bool KeyMayExist(ReadOptions& options,
const Slice& key,
std::string* value,
bool* value_found = nullptr);
virtual Status Delete(const WriteOptions& wopts, const Slice& key);