Merge branch 'master' into performance
Conflicts: db/version_set.cc include/leveldb/options.h util/options.cc
This commit is contained in:
commit
4a745a5666
4
Makefile
4
Makefile
@ -90,8 +90,8 @@ SHARED3 = $(SHARED1)
|
||||
SHARED = $(SHARED1)
|
||||
else
|
||||
# Update db.h if you change these.
|
||||
SHARED_MAJOR = 1
|
||||
SHARED_MINOR = 5
|
||||
SHARED_MAJOR = 2
|
||||
SHARED_MINOR = 0
|
||||
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
|
||||
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
|
||||
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
|
||||
|
@ -302,18 +302,12 @@ static uint64_t FLAGS_WAL_ttl_seconds = 0;
|
||||
// Allow buffered io using OS buffers
|
||||
static bool FLAGS_use_os_buffer;
|
||||
|
||||
// Allow filesystem to do read-aheads
|
||||
static bool FLAGS_use_fsreadahead;
|
||||
|
||||
// Allow reads to occur via mmap-ing files
|
||||
static bool FLAGS_use_mmap_reads;
|
||||
|
||||
// Allow writes to occur via mmap-ing files
|
||||
static bool FLAGS_use_mmap_writes;
|
||||
|
||||
// Allow readaheads to occur for compactions
|
||||
static bool FLAGS_use_readahead_compactions;
|
||||
|
||||
// Advise random access on table file open
|
||||
static bool FLAGS_advise_random_on_open =
|
||||
leveldb::Options().advise_random_on_open;
|
||||
@ -343,6 +337,9 @@ static auto FLAGS_use_adaptive_mutex =
|
||||
static auto FLAGS_bytes_per_sync =
|
||||
leveldb::Options().bytes_per_sync;
|
||||
|
||||
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||
static bool FLAGS_deletes_check_filter_first = false;
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
// Helper for quickly generating random data.
|
||||
@ -1131,6 +1128,7 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
|
||||
options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
|
||||
options.max_bytes_for_level_multiplier =
|
||||
FLAGS_max_bytes_for_level_multiplier;
|
||||
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() > 0) {
|
||||
if (FLAGS_max_bytes_for_level_multiplier_additional.size() !=
|
||||
(unsigned int)FLAGS_num_levels) {
|
||||
@ -1172,7 +1170,6 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
|
||||
|
||||
// fill storage options
|
||||
options.allow_os_buffer = FLAGS_use_os_buffer;
|
||||
options.allow_readahead = FLAGS_use_fsreadahead;
|
||||
options.allow_mmap_reads = FLAGS_use_mmap_reads;
|
||||
options.allow_mmap_writes = FLAGS_use_mmap_writes;
|
||||
options.advise_random_on_open = FLAGS_advise_random_on_open;
|
||||
@ -2106,12 +2103,6 @@ int main(int argc, char** argv) {
|
||||
} else if (sscanf(argv[i], "--mmap_write=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_use_mmap_writes = n;
|
||||
} else if (sscanf(argv[i], "--readahead=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_use_fsreadahead = n;
|
||||
} else if (sscanf(argv[i], "--readahead_compactions=%d%c", &n, &junk) == 1&&
|
||||
(n == 0 || n == 1)) {
|
||||
FLAGS_use_readahead_compactions = n;
|
||||
} else if (sscanf(argv[i], "--statistics=%d%c", &n, &junk) == 1 &&
|
||||
(n == 0 || n == 1)) {
|
||||
if (n == 1) {
|
||||
@ -2255,6 +2246,9 @@ int main(int argc, char** argv) {
|
||||
FLAGS_keys_per_multiget = n;
|
||||
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
|
||||
FLAGS_bytes_per_sync = l;
|
||||
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
||||
== 1 && (n == 0 || n ==1 )) {
|
||||
FLAGS_deletes_check_filter_first = n;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||
exit(1);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
|
||||
@ -2015,6 +2016,16 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
||||
Status DBImpl::Get(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value) {
|
||||
return GetImpl(options, key, value);
|
||||
}
|
||||
|
||||
// If no_IO is true, then returns Status::NotFound if key is not in memtable,
|
||||
// immutable-memtable and bloom-filters can guarantee that key is not in db,
|
||||
// "value" is garbage string if no_IO is true
|
||||
Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value,
|
||||
const bool no_IO) {
|
||||
Status s;
|
||||
|
||||
StopWatch sw(env_, options_.statistics, DB_GET);
|
||||
@ -2043,12 +2054,12 @@ Status DBImpl::Get(const ReadOptions& options,
|
||||
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
||||
// value will contain the current merge operand in the latter case.
|
||||
LookupKey lkey(key, snapshot);
|
||||
if (mem->Get(lkey, value, &s, options_)) {
|
||||
if (mem->Get(lkey, value, &s, options_, no_IO)) {
|
||||
// Done
|
||||
} else if (imm.Get(lkey, value, &s, options_)) {
|
||||
} else if (imm.Get(lkey, value, &s, options_, no_IO)) {
|
||||
// Done
|
||||
} else {
|
||||
current->Get(options, lkey, value, &s, &stats, options_);
|
||||
current->Get(options, lkey, value, &s, &stats, options_, no_IO);
|
||||
have_stat_update = true;
|
||||
}
|
||||
mutex_.Lock();
|
||||
@ -2138,6 +2149,12 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
||||
return statList;
|
||||
}
|
||||
|
||||
bool DBImpl::KeyMayExist(const Slice& key) {
|
||||
std::string value;
|
||||
const Status s = GetImpl(ReadOptions(), key, &value, true);
|
||||
return !s.IsNotFound();
|
||||
}
|
||||
|
||||
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
||||
SequenceNumber latest_snapshot;
|
||||
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
|
||||
@ -2173,6 +2190,10 @@ Status DBImpl::Merge(const WriteOptions& o, const Slice& key,
|
||||
}
|
||||
|
||||
Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
||||
if (options_.deletes_check_filter_first && !KeyMayExist(key)) {
|
||||
RecordTick(options_.statistics, NUMBER_FILTERED_DELETES);
|
||||
return Status::OK();
|
||||
}
|
||||
return DB::Delete(options, key);
|
||||
}
|
||||
|
||||
@ -2232,13 +2253,19 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
}
|
||||
if (status.ok()) {
|
||||
status = WriteBatchInternal::InsertInto(updates, mem_);
|
||||
if (!status.ok()) {
|
||||
// Panic for in-memory corruptions
|
||||
// Note that existing logic was not sound. Any partial failure writing
|
||||
// into the memtable would result in a state that some write ops might
|
||||
// have succeeded in memtable but Status reports error for all writes.
|
||||
throw std::runtime_error("In memory WriteBatch corruption!");
|
||||
}
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
last_flushed_sequence_ = current_sequence;
|
||||
}
|
||||
mutex_.Lock();
|
||||
}
|
||||
last_flushed_sequence_ = current_sequence;
|
||||
if (updates == &tmp_batch_) tmp_batch_.Clear();
|
||||
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
10
db/db_impl.h
10
db/db_impl.h
@ -48,6 +48,10 @@ class DBImpl : public DB {
|
||||
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
||||
const std::vector<Slice>& keys,
|
||||
std::vector<std::string>* values);
|
||||
|
||||
// Returns false if key can't exist- based on memtable, immutable-memtable and
|
||||
// bloom-filters; true otherwise. No IO is performed
|
||||
virtual bool KeyMayExist(const Slice& key);
|
||||
virtual Iterator* NewIterator(const ReadOptions&);
|
||||
virtual const Snapshot* GetSnapshot();
|
||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||
@ -379,6 +383,12 @@ class DBImpl : public DB {
|
||||
SequenceNumber in,
|
||||
std::vector<SequenceNumber>& snapshots,
|
||||
SequenceNumber* prev_snapshot);
|
||||
|
||||
// Function that Get and KeyMayExist call with no_IO true or false
|
||||
Status GetImpl(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value,
|
||||
const bool no_IO = false);
|
||||
};
|
||||
|
||||
// Sanitize db options. The caller should delete result.info_log if
|
||||
|
@ -218,6 +218,7 @@ class DBTest {
|
||||
kManifestFileSize,
|
||||
kCompactOnFlush,
|
||||
kPerfOptions,
|
||||
kDeletesFilterFirst,
|
||||
kEnd
|
||||
};
|
||||
int option_config_;
|
||||
@ -289,6 +290,9 @@ class DBTest {
|
||||
options.rate_limit_delay_milliseconds = 2;
|
||||
// TODO -- test more options
|
||||
break;
|
||||
case kDeletesFilterFirst:
|
||||
options.deletes_check_filter_first = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -768,6 +772,39 @@ TEST(DBTest, GetEncountersEmptyLevel) {
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
// KeyMayExist-API returns false if memtable(s) and in-memory bloom-filters can
|
||||
// guarantee that the key doesn't exist in the db, else true. This can lead to
|
||||
// a few false positives, but not false negatives. To make test deterministic,
|
||||
// use a much larger number of bits per key-20 than bits in the key, so
|
||||
// that false positives are eliminated
|
||||
TEST(DBTest, KeyMayExist) {
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
options.filter_policy = NewBloomFilterPolicy(20);
|
||||
Reopen(&options);
|
||||
|
||||
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "a", "b"));
|
||||
ASSERT_TRUE(db_->KeyMayExist("a"));
|
||||
|
||||
dbfull()->Flush(FlushOptions());
|
||||
ASSERT_TRUE(db_->KeyMayExist("a"));
|
||||
|
||||
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
||||
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||
|
||||
dbfull()->Flush(FlushOptions());
|
||||
dbfull()->CompactRange(nullptr, nullptr);
|
||||
ASSERT_TRUE(!db_->KeyMayExist("a"));
|
||||
|
||||
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
||||
ASSERT_TRUE(!db_->KeyMayExist("c"));
|
||||
|
||||
delete options.filter_policy;
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, IterEmpty) {
|
||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||
|
||||
@ -1403,7 +1440,7 @@ class DeleteFilter : public CompactionFilter {
|
||||
|
||||
class ChangeFilter : public CompactionFilter {
|
||||
public:
|
||||
ChangeFilter(int argv) : argv_(argv) {}
|
||||
explicit ChangeFilter(int argv) : argv_(argv) {}
|
||||
|
||||
virtual bool Filter(int level, const Slice& key,
|
||||
const Slice& value, std::string* new_value,
|
||||
@ -2970,6 +3007,9 @@ class ModelDB: public DB {
|
||||
Status::NotSupported("Not implemented."));
|
||||
return s;
|
||||
}
|
||||
virtual bool KeyMayExist(const Slice& key) {
|
||||
return true; // Not Supported directly
|
||||
}
|
||||
virtual Iterator* NewIterator(const ReadOptions& options) {
|
||||
if (options.snapshot == nullptr) {
|
||||
KVMap* saved = new KVMap;
|
||||
|
@ -119,7 +119,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
||||
}
|
||||
|
||||
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
const Options& options) {
|
||||
const Options& options, const bool check_presence_only) {
|
||||
Slice memkey = key.memtable_key();
|
||||
Table::Iterator iter(&table_);
|
||||
iter.Seek(memkey.data());
|
||||
@ -164,6 +164,10 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
return true;
|
||||
}
|
||||
case kTypeMerge: {
|
||||
if (check_presence_only) {
|
||||
*s = Status::OK();
|
||||
return true;
|
||||
}
|
||||
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
||||
if (merge_in_progress) {
|
||||
merge_operator->Merge(key.user_key(), &v, operand,
|
||||
|
@ -63,12 +63,13 @@ class MemTable {
|
||||
// If memtable contains a deletion for key, store a NotFound() error
|
||||
// in *status and return true.
|
||||
// If memtable contains Merge operation as the most recent entry for a key,
|
||||
// and the merge process does not stop (not reaching a value or delete),
|
||||
// and if check_presence_only is set, return true with Status::OK,
|
||||
// else if the merge process does not stop (not reaching a value or delete),
|
||||
// store the current merged result in value and MergeInProgress in s.
|
||||
// return false
|
||||
// Else, return false.
|
||||
bool Get(const LookupKey& key, std::string* value, Status* s,
|
||||
const Options& options);
|
||||
const Options& options, const bool check_presence_only = false);
|
||||
|
||||
// Returns the edits area that is needed for flushing the memtable
|
||||
VersionEdit* GetEdits() { return &edit_; }
|
||||
|
@ -194,10 +194,10 @@ size_t MemTableList::ApproximateMemoryUsage() {
|
||||
// Search all the memtables starting from the most recent one.
|
||||
// Return the most recent value found, if any.
|
||||
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
const Options& options ) {
|
||||
const Options& options, const bool check_presence_only) {
|
||||
for (list<MemTable*>::iterator it = memlist_.begin();
|
||||
it != memlist_.end(); ++it) {
|
||||
if ((*it)->Get(key, value, s, options)) {
|
||||
if ((*it)->Get(key, value, s, options, check_presence_only)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ class MemTableList {
|
||||
// Search all the memtables starting from the most recent one.
|
||||
// Return the most recent value found, if any.
|
||||
bool Get(const LookupKey& key, std::string* value, Status* s,
|
||||
const Options& options);
|
||||
const Options& options, const bool check_presence_only = false);
|
||||
|
||||
// Returns the list of underlying memtables.
|
||||
void GetMemTables(std::vector<MemTable*>* list);
|
||||
|
@ -112,14 +112,16 @@ Status TableCache::Get(const ReadOptions& options,
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
bool (*saver)(void*, const Slice&, const Slice&, bool),
|
||||
bool* tableIO) {
|
||||
bool* tableIO,
|
||||
void (*mark_key_may_exist)(void*),
|
||||
const bool no_IO) {
|
||||
Cache::Handle* handle = nullptr;
|
||||
Status s = FindTable(storage_options_, file_number, file_size,
|
||||
&handle, tableIO);
|
||||
if (s.ok()) {
|
||||
Table* t =
|
||||
reinterpret_cast<Table*>(cache_->Value(handle));
|
||||
s = t->InternalGet(options, k, arg, saver);
|
||||
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist, no_IO);
|
||||
cache_->Release(handle);
|
||||
}
|
||||
return s;
|
||||
|
@ -48,7 +48,9 @@ class TableCache {
|
||||
const Slice& k,
|
||||
void* arg,
|
||||
bool (*handle_result)(void*, const Slice&, const Slice&, bool),
|
||||
bool* tableIO);
|
||||
bool* tableIO,
|
||||
void (*mark_key_may_exist)(void*) = nullptr,
|
||||
const bool no_IO = false);
|
||||
|
||||
// Evict any entry for the specified file number
|
||||
void Evict(uint64_t file_number);
|
||||
|
@ -245,6 +245,16 @@ struct Saver {
|
||||
bool didIO; // did we do any disk io?
|
||||
};
|
||||
}
|
||||
|
||||
// Called from TableCache::Get when bloom-filters can't guarantee that key does
|
||||
// not exist and Get is not permitted to do IO to read the data-block and be
|
||||
// certain.
|
||||
// Set the key as Found and let the caller know that key-may-exist
|
||||
static void MarkKeyMayExist(void* arg) {
|
||||
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||
s->state = kFound;
|
||||
}
|
||||
|
||||
static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){
|
||||
Saver* s = reinterpret_cast<Saver*>(arg);
|
||||
ParsedInternalKey parsed_key;
|
||||
@ -337,7 +347,8 @@ void Version::Get(const ReadOptions& options,
|
||||
std::string* value,
|
||||
Status *status,
|
||||
GetStats* stats,
|
||||
const Options& db_options) {
|
||||
const Options& db_options,
|
||||
const bool no_IO) {
|
||||
Slice ikey = k.internal_key();
|
||||
Slice user_key = k.user_key();
|
||||
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
||||
@ -346,6 +357,9 @@ void Version::Get(const ReadOptions& options,
|
||||
auto logger = db_options.info_log;
|
||||
|
||||
assert(status->ok() || status->IsMergeInProgress());
|
||||
if (no_IO) {
|
||||
assert(status->ok());
|
||||
}
|
||||
Saver saver;
|
||||
saver.state = status->ok()? kNotFound : kMerge;
|
||||
saver.ucmp = ucmp;
|
||||
@ -417,7 +431,8 @@ void Version::Get(const ReadOptions& options,
|
||||
FileMetaData* f = files[i];
|
||||
bool tableIO = false;
|
||||
*status = vset_->table_cache_->Get(options, f->number, f->file_size,
|
||||
ikey, &saver, SaveValue, &tableIO);
|
||||
ikey, &saver, SaveValue, &tableIO,
|
||||
MarkKeyMayExist, no_IO);
|
||||
// TODO: examine the behavior for corrupted key
|
||||
if (!status->ok()) {
|
||||
return;
|
||||
@ -2199,7 +2214,7 @@ Compaction* VersionSet::PickCompactionBySize(int level, double score) {
|
||||
|
||||
assert(level >= 0);
|
||||
assert(level+1 < NumberLevels());
|
||||
c = new Compaction(level, level+1, MaxFileSizeForLevel(level),
|
||||
c = new Compaction(level, level+1, MaxFileSizeForLevel(level+1),
|
||||
MaxGrandParentOverlapBytes(level), NumberLevels());
|
||||
c->score_ = score;
|
||||
|
||||
@ -2304,7 +2319,7 @@ Compaction* VersionSet::PickCompaction() {
|
||||
if (level != 0 || compactions_in_progress_[0].empty()) {
|
||||
if(!ParentRangeInCompaction(&f->smallest, &f->largest, level,
|
||||
&parent_index)) {
|
||||
c = new Compaction(level, level, MaxFileSizeForLevel(level),
|
||||
c = new Compaction(level, level, MaxFileSizeForLevel(level+1),
|
||||
MaxGrandParentOverlapBytes(level), NumberLevels(), true);
|
||||
c->inputs_[0].push_back(f);
|
||||
c->parent_index_ = parent_index;
|
||||
@ -2481,7 +2496,7 @@ Compaction* VersionSet::CompactRange(
|
||||
int out_level = (options_->compaction_style == kCompactionStyleUniversal) ?
|
||||
level : level+1;
|
||||
|
||||
Compaction* c = new Compaction(level, out_level, MaxFileSizeForLevel(level),
|
||||
Compaction* c = new Compaction(level, out_level, MaxFileSizeForLevel(out_level),
|
||||
MaxGrandParentOverlapBytes(level), NumberLevels());
|
||||
c->input_version_ = current_;
|
||||
c->input_version_->Ref();
|
||||
|
@ -74,7 +74,8 @@ class Version {
|
||||
int seek_file_level;
|
||||
};
|
||||
void Get(const ReadOptions&, const LookupKey& key, std::string* val,
|
||||
Status* status, GetStats* stats, const Options& db_option);
|
||||
Status* status, GetStats* stats, const Options& db_option,
|
||||
const bool no_IO = false);
|
||||
|
||||
// Adds "stats" into the current state. Returns true if a new
|
||||
// compaction may need to be triggered, false otherwise.
|
||||
|
@ -19,8 +19,8 @@ namespace leveldb {
|
||||
using std::unique_ptr;
|
||||
|
||||
// Update Makefile if you change these
|
||||
static const int kMajorVersion = 1;
|
||||
static const int kMinorVersion = 5;
|
||||
static const int kMajorVersion = 2;
|
||||
static const int kMinorVersion = 0;
|
||||
|
||||
struct Options;
|
||||
struct ReadOptions;
|
||||
@ -120,6 +120,11 @@ class DB {
|
||||
const std::vector<Slice>& keys,
|
||||
std::vector<std::string>* values) = 0;
|
||||
|
||||
// If the key definitely does not exist in the database, then this method
|
||||
// returns false. Otherwise return true. This check is potentially
|
||||
// lighter-weight than invoking DB::Get(). No IO is performed
|
||||
virtual bool KeyMayExist(const Slice& key) = 0;
|
||||
|
||||
// Return a heap-allocated iterator over the contents of the database.
|
||||
// The result of NewIterator() is initially invalid (caller must
|
||||
// call one of the Seek methods on the iterator before using it).
|
||||
|
@ -422,17 +422,6 @@ struct Options {
|
||||
// Default: true
|
||||
bool allow_os_buffer;
|
||||
|
||||
// Reading a single block from a file can cause the OS/FS to start
|
||||
// readaheads of other blocks from the file. Default: true
|
||||
// Note: Deprecated
|
||||
bool allow_readahead;
|
||||
|
||||
// The reads triggered by compaction allows data to be readahead
|
||||
// by the OS/FS. This overrides the setting of 'allow_readahead'
|
||||
// for compaction-reads. Default: true
|
||||
// Note: Deprecated
|
||||
bool allow_readahead_compactions;
|
||||
|
||||
// Allow the OS to mmap file for reading sst tables. Default: false
|
||||
bool allow_mmap_reads;
|
||||
|
||||
@ -487,6 +476,15 @@ struct Options {
|
||||
|
||||
// The options needed to support Universal Style compactions
|
||||
CompactionOptionsUniversal compaction_options_universal;
|
||||
|
||||
// Use bloom-filter for deletes when this is true.
|
||||
// db->Delete first calls KeyMayExist which checks memtable,immutable-memtable
|
||||
// and bloom-filters to determine if the key does not exist in the database.
|
||||
// If the key definitely does not exist, then the delete is a noop.KeyMayExist
|
||||
// only incurs in-memory look up. This optimization avoids writing the delete
|
||||
// to storage when appropriate.
|
||||
// Default: false
|
||||
bool deletes_check_filter_first;
|
||||
};
|
||||
|
||||
// Options that control read operations
|
||||
|
@ -58,7 +58,9 @@ enum Tickers {
|
||||
NUMBER_MULTIGET_KEYS_READ = 19,
|
||||
NUMBER_MULTIGET_BYTES_READ = 20,
|
||||
|
||||
TICKER_ENUM_MAX = 21
|
||||
NUMBER_FILTERED_DELETES = 21,
|
||||
|
||||
TICKER_ENUM_MAX = 22
|
||||
};
|
||||
|
||||
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
@ -82,7 +84,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
{ NO_ITERATORS, "rocksdb.num.iterators" },
|
||||
{ NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" },
|
||||
{ NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" },
|
||||
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" }
|
||||
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" },
|
||||
{ NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered" }
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -322,7 +322,9 @@ Iterator* Table::NewIterator(const ReadOptions& options) const {
|
||||
Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||
void* arg,
|
||||
bool (*saver)(void*, const Slice&, const Slice&,
|
||||
bool)) {
|
||||
bool),
|
||||
void (*mark_key_may_exist)(void*),
|
||||
const bool no_IO) {
|
||||
Status s;
|
||||
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
|
||||
bool done = false;
|
||||
@ -338,6 +340,11 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
|
||||
// cross one data block, we should be fine.
|
||||
RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL);
|
||||
break;
|
||||
} else if (no_IO) {
|
||||
// Update Saver.state to Found because we are only looking for whether
|
||||
// bloom-filter can guarantee the key is not there when "no_IO"
|
||||
(*mark_key_may_exist)(arg);
|
||||
done = true;
|
||||
} else {
|
||||
bool didIO = false;
|
||||
Iterator* block_iter = BlockReader(this, options, iiter->value(),
|
||||
|
@ -86,7 +86,9 @@ class Table {
|
||||
Status InternalGet(
|
||||
const ReadOptions&, const Slice& key,
|
||||
void* arg,
|
||||
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool));
|
||||
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
|
||||
void (*mark_key_may_exist)(void*) = nullptr,
|
||||
const bool no_IO = false);
|
||||
|
||||
|
||||
void ReadMeta(const Footer& footer);
|
||||
|
@ -183,6 +183,9 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock
|
||||
// Percentage of times we want to purge redundant keys in memory before flushing
|
||||
static uint32_t FLAGS_purge_redundant_percent = 50;
|
||||
|
||||
// On true, deletes use bloom-filter and drop the delete if key not present
|
||||
static bool FLAGS_deletes_check_filter_first = false;
|
||||
|
||||
// Level0 compaction start trigger
|
||||
static int FLAGS_level0_file_num_compaction_trigger = 0;
|
||||
|
||||
@ -903,6 +906,8 @@ class StressTest {
|
||||
FLAGS_test_batches_snapshots);
|
||||
fprintf(stdout, "Purge redundant %% : %d\n",
|
||||
FLAGS_purge_redundant_percent);
|
||||
fprintf(stdout, "Deletes use filter : %d\n",
|
||||
FLAGS_deletes_check_filter_first);
|
||||
fprintf(stdout, "Num keys per lock : %d\n",
|
||||
1 << FLAGS_log2_keys_per_lock);
|
||||
|
||||
@ -959,6 +964,7 @@ class StressTest {
|
||||
options.delete_obsolete_files_period_micros =
|
||||
FLAGS_delete_obsolete_files_period_micros;
|
||||
options.max_manifest_file_size = 1024;
|
||||
options.deletes_check_filter_first = FLAGS_deletes_check_filter_first;
|
||||
static Random purge_percent(1000); // no benefit from non-determinism here
|
||||
if (purge_percent.Uniform(100) < FLAGS_purge_redundant_percent - 1) {
|
||||
options.purge_redundant_kvs_while_flush = false;
|
||||
@ -998,10 +1004,7 @@ class StressTest {
|
||||
|
||||
void PrintStatistics() {
|
||||
if (dbstats) {
|
||||
fprintf(stdout, "File opened:%ld closed:%ld errors:%ld\n",
|
||||
dbstats->getTickerCount(NO_FILE_OPENS),
|
||||
dbstats->getTickerCount(NO_FILE_CLOSES),
|
||||
dbstats->getTickerCount(NO_FILE_ERRORS));
|
||||
fprintf(stdout, "STATISTICS:\n%s\n", dbstats->ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1165,6 +1168,9 @@ int main(int argc, char** argv) {
|
||||
} else if (sscanf(argv[i], "--purge_redundant_percent=%d%c", &n, &junk) == 1
|
||||
&& (n >= 0 && n <= 100)) {
|
||||
FLAGS_purge_redundant_percent = n;
|
||||
} else if (sscanf(argv[i], "--deletes_check_filter_first=%d%c", &n, &junk)
|
||||
== 1 && (n == 0 || n == 1)) {
|
||||
FLAGS_deletes_check_filter_first = n;
|
||||
} else {
|
||||
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
|
||||
exit(1);
|
||||
|
148
util/ldb_cmd.cc
148
util/ldb_cmd.cc
@ -5,6 +5,7 @@
|
||||
#include "util/ldb_cmd.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "db/db_impl.h"
|
||||
#include "db/log_reader.h"
|
||||
#include "db/filename.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
@ -45,7 +46,7 @@ const char* LDBCommand::DELIM = " ==> ";
|
||||
LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
int argc,
|
||||
char** argv,
|
||||
Options options
|
||||
const Options& options
|
||||
) {
|
||||
vector<string> args;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
@ -66,7 +67,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
*/
|
||||
LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
const vector<string>& args,
|
||||
Options options
|
||||
const Options& options
|
||||
) {
|
||||
// --x=y command line arguments are added as x->y map entries.
|
||||
map<string, string> option_map;
|
||||
@ -80,9 +81,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
|
||||
const string OPTION_PREFIX = "--";
|
||||
|
||||
for (vector<string>::const_iterator itr = args.begin();
|
||||
itr != args.end(); itr++) {
|
||||
string arg = *itr;
|
||||
for (const auto& arg : args) {
|
||||
if (arg[0] == '-' && arg[1] == '-'){
|
||||
vector<string> splits = stringSplit(arg, '=');
|
||||
if (splits.size() == 2) {
|
||||
@ -93,7 +92,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
flags.push_back(optionKey);
|
||||
}
|
||||
} else {
|
||||
cmdTokens.push_back(string(arg));
|
||||
cmdTokens.push_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,9 +118,9 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs(
|
||||
|
||||
LDBCommand* LDBCommand::SelectCommand(
|
||||
const std::string& cmd,
|
||||
vector<string>& cmdParams,
|
||||
map<string, string>& option_map,
|
||||
vector<string>& flags
|
||||
const vector<string>& cmdParams,
|
||||
const map<string, string>& option_map,
|
||||
const vector<string>& flags
|
||||
) {
|
||||
|
||||
if (cmd == GetCommand::Name()) {
|
||||
@ -150,6 +149,8 @@ LDBCommand* LDBCommand::SelectCommand(
|
||||
return new DBLoaderCommand(cmdParams, option_map, flags);
|
||||
} else if (cmd == ManifestDumpCommand::Name()) {
|
||||
return new ManifestDumpCommand(cmdParams, option_map, flags);
|
||||
} else if (cmd == InternalDumpCommand::Name()) {
|
||||
return new InternalDumpCommand(cmdParams, option_map, flags);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -163,7 +164,8 @@ LDBCommand* LDBCommand::SelectCommand(
|
||||
* updated.
|
||||
*/
|
||||
bool LDBCommand::ParseIntOption(const map<string, string>& options,
|
||||
string option, int& value, LDBCommandExecuteResult& exec_state) {
|
||||
const string& option, int& value,
|
||||
LDBCommandExecuteResult& exec_state) {
|
||||
|
||||
map<string, string>::const_iterator itr = option_map_.find(option);
|
||||
if (itr != option_map_.end()) {
|
||||
@ -181,6 +183,21 @@ bool LDBCommand::ParseIntOption(const map<string, string>& options,
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the specified option and fills in the value.
|
||||
* Returns true if the option is found.
|
||||
* Returns false otherwise.
|
||||
*/
|
||||
bool LDBCommand::ParseStringOption(const map<string, string>& options,
|
||||
const string& option, string* value) {
|
||||
auto itr = option_map_.find(option);
|
||||
if (itr != option_map_.end()) {
|
||||
*value = itr->second;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Options LDBCommand::PrepareOptionsForOpenDB() {
|
||||
|
||||
Options opt = options_;
|
||||
@ -453,7 +470,7 @@ void ManifestDumpCommand::Help(string& ret) {
|
||||
ManifestDumpCommand::ManifestDumpCommand(const vector<string>& params,
|
||||
const map<string, string>& options, const vector<string>& flags) :
|
||||
LDBCommand(options, flags, false,
|
||||
BuildCmdLineOptions({ARG_VERBOSE,ARG_PATH})),
|
||||
BuildCmdLineOptions({ARG_VERBOSE, ARG_PATH, ARG_HEX})),
|
||||
verbose_(false),
|
||||
path_("")
|
||||
{
|
||||
@ -559,6 +576,115 @@ void PrintBucketCounts(const vector<uint64_t>& bucket_counts, int ttl_start,
|
||||
ReadableTime(ttl_end).c_str(), bucket_counts[num_buckets - 1]);
|
||||
}
|
||||
|
||||
const string InternalDumpCommand::ARG_COUNT_ONLY = "count_only";
|
||||
const string InternalDumpCommand::ARG_STATS = "stats";
|
||||
|
||||
InternalDumpCommand::InternalDumpCommand(const vector<string>& params,
|
||||
const map<string, string>& options,
|
||||
const vector<string>& flags) :
|
||||
LDBCommand(options, flags, true,
|
||||
BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX,
|
||||
ARG_FROM, ARG_TO, ARG_MAX_KEYS,
|
||||
ARG_COUNT_ONLY, ARG_STATS})),
|
||||
has_from_(false),
|
||||
has_to_(false),
|
||||
max_keys_(-1),
|
||||
count_only_(false),
|
||||
print_stats_(false) {
|
||||
|
||||
has_from_ = ParseStringOption(options, ARG_FROM, &from_);
|
||||
has_to_ = ParseStringOption(options, ARG_TO, &to_);
|
||||
|
||||
ParseIntOption(options, ARG_MAX_KEYS, max_keys_, exec_state_);
|
||||
|
||||
print_stats_ = IsFlagPresent(flags, ARG_STATS);
|
||||
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
|
||||
|
||||
if (is_key_hex_) {
|
||||
if (has_from_) {
|
||||
from_ = HexToString(from_);
|
||||
}
|
||||
if (has_to_) {
|
||||
to_ = HexToString(to_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InternalDumpCommand::Help(string& ret) {
|
||||
ret.append(" ");
|
||||
ret.append(InternalDumpCommand::Name());
|
||||
ret.append(HelpRangeCmdArgs());
|
||||
ret.append(" [--" + ARG_MAX_KEYS + "=<N>]");
|
||||
ret.append(" [--" + ARG_COUNT_ONLY + "]");
|
||||
ret.append(" [--" + ARG_STATS + "]");
|
||||
ret.append("\n");
|
||||
}
|
||||
|
||||
void InternalDumpCommand::DoCommand() {
|
||||
if (!db_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (print_stats_) {
|
||||
string stats;
|
||||
if (db_->GetProperty("leveldb.stats", &stats)) {
|
||||
fprintf(stdout, "%s\n", stats.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Cast as DBImpl to get internal iterator
|
||||
DBImpl* idb = dynamic_cast<DBImpl*>(db_);
|
||||
if (!idb) {
|
||||
exec_state_ = LDBCommandExecuteResult::FAILED("DB is not DBImpl");
|
||||
return;
|
||||
}
|
||||
|
||||
// Setup internal key iterator
|
||||
auto iter = unique_ptr<Iterator>(idb->TEST_NewInternalIterator());
|
||||
Status st = iter->status();
|
||||
if (!st.ok()) {
|
||||
exec_state_ = LDBCommandExecuteResult::FAILED("Iterator error:"
|
||||
+ st.ToString());
|
||||
}
|
||||
|
||||
if (has_from_) {
|
||||
InternalKey ikey(from_, kMaxSequenceNumber, kValueTypeForSeek);
|
||||
iter->Seek(ikey.Encode());
|
||||
} else {
|
||||
iter->SeekToFirst();
|
||||
}
|
||||
|
||||
long long count = 0;
|
||||
for (; iter->Valid(); iter->Next()) {
|
||||
ParsedInternalKey ikey;
|
||||
if (!ParseInternalKey(iter->key(), &ikey)) {
|
||||
fprintf(stderr, "Internal Key [%s] parse error!\n",
|
||||
iter->key().ToString(true /* in hex*/).data());
|
||||
// TODO: add error counter
|
||||
continue;
|
||||
}
|
||||
|
||||
// If end marker was specified, we stop before it
|
||||
if (has_to_ && options_.comparator->Compare(ikey.user_key, to_) >= 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
++count;
|
||||
|
||||
if (!count_only_) {
|
||||
string key = ikey.DebugString(is_key_hex_);
|
||||
string value = iter->value().ToString(is_value_hex_);
|
||||
fprintf(stdout, "%s => %s\n", key.data(), value.data());
|
||||
}
|
||||
|
||||
// Terminate if maximum number of keys have been dumped
|
||||
if (max_keys_ > 0 && count >= max_keys_) break;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Internal keys in range: %lld\n", (long long) count);
|
||||
}
|
||||
|
||||
|
||||
const string DBDumperCommand::ARG_COUNT_ONLY = "count_only";
|
||||
const string DBDumperCommand::ARG_STATS = "stats";
|
||||
const string DBDumperCommand::ARG_TTL_BUCKET = "bucket";
|
||||
|
@ -55,13 +55,13 @@ public:
|
||||
|
||||
static LDBCommand* InitFromCmdLineArgs(
|
||||
const vector<string>& args,
|
||||
Options options = Options()
|
||||
const Options& options = Options()
|
||||
);
|
||||
|
||||
static LDBCommand* InitFromCmdLineArgs(
|
||||
int argc,
|
||||
char** argv,
|
||||
Options options = Options()
|
||||
const Options& options = Options()
|
||||
);
|
||||
|
||||
bool ValidateCmdLineOptions();
|
||||
@ -230,6 +230,8 @@ protected:
|
||||
string msg = st.ToString();
|
||||
exec_state_ = LDBCommandExecuteResult::FAILED(msg);
|
||||
}
|
||||
|
||||
options_ = opt;
|
||||
}
|
||||
|
||||
void CloseDB () {
|
||||
@ -281,13 +283,16 @@ protected:
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ParseIntOption(const map<string, string>& options, string option,
|
||||
bool ParseIntOption(const map<string, string>& options, const string& option,
|
||||
int& value, LDBCommandExecuteResult& exec_state);
|
||||
|
||||
private:
|
||||
bool ParseStringOption(const map<string, string>& options,
|
||||
const string& option, string* value);
|
||||
|
||||
Options options_;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* Interpret command line options and flags to determine if the key
|
||||
* should be input/output in hex.
|
||||
@ -347,9 +352,9 @@ private:
|
||||
|
||||
static LDBCommand* SelectCommand(
|
||||
const string& cmd,
|
||||
vector<string>& cmdParams,
|
||||
map<string, string>& option_map,
|
||||
vector<string>& flags
|
||||
const vector<string>& cmdParams,
|
||||
const map<string, string>& option_map,
|
||||
const vector<string>& flags
|
||||
);
|
||||
|
||||
};
|
||||
@ -397,6 +402,31 @@ private:
|
||||
static const string ARG_TTL_BUCKET;
|
||||
};
|
||||
|
||||
class InternalDumpCommand: public LDBCommand {
|
||||
public:
|
||||
static string Name() { return "idump"; }
|
||||
|
||||
InternalDumpCommand(const vector<string>& params,
|
||||
const map<string, string>& options,
|
||||
const vector<string>& flags);
|
||||
|
||||
static void Help(string& ret);
|
||||
|
||||
virtual void DoCommand();
|
||||
|
||||
private:
|
||||
bool has_from_;
|
||||
string from_;
|
||||
bool has_to_;
|
||||
string to_;
|
||||
int max_keys_;
|
||||
bool count_only_;
|
||||
bool print_stats_;
|
||||
|
||||
static const string ARG_COUNT_ONLY;
|
||||
static const string ARG_STATS;
|
||||
};
|
||||
|
||||
class DBLoaderCommand: public LDBCommand {
|
||||
public:
|
||||
static string Name() { return "load"; }
|
||||
|
@ -59,6 +59,7 @@ public:
|
||||
DBDumperCommand::Help(ret);
|
||||
DBLoaderCommand::Help(ret);
|
||||
ManifestDumpCommand::Help(ret);
|
||||
InternalDumpCommand::Help(ret);
|
||||
|
||||
fprintf(stderr, "%s\n", ret.c_str());
|
||||
}
|
||||
|
@ -65,8 +65,6 @@ Options::Options()
|
||||
manifest_preallocation_size(4 * 1024 * 1024),
|
||||
purge_redundant_kvs_while_flush(true),
|
||||
allow_os_buffer(true),
|
||||
allow_readahead(true),
|
||||
allow_readahead_compactions(true),
|
||||
allow_mmap_reads(false),
|
||||
allow_mmap_writes(true),
|
||||
is_fd_close_on_exec(true),
|
||||
@ -78,6 +76,7 @@ Options::Options()
|
||||
use_adaptive_mutex(false),
|
||||
bytes_per_sync(0),
|
||||
compaction_style(kCompactionStyleLevel) {
|
||||
deletes_check_filter_first(false) {
|
||||
}
|
||||
|
||||
static const char* const access_hints[] = {
|
||||
@ -127,13 +126,10 @@ Options::Dump(Logger* log) const
|
||||
Log(log," Options.db_stats_log_interval: %d",
|
||||
db_stats_log_interval);
|
||||
Log(log," Options.allow_os_buffer: %d", allow_os_buffer);
|
||||
Log(log," Options.allow_readahead: %d", allow_readahead);
|
||||
Log(log," Options.allow_mmap_reads: %d", allow_mmap_reads);
|
||||
Log(log," Options.allow_mmap_writes: %d", allow_mmap_writes);
|
||||
Log(log," Options.min_write_buffer_number_to_merge: %d",
|
||||
min_write_buffer_number_to_merge);
|
||||
Log(log," Options.allow_readahead_compactions: %d",
|
||||
allow_readahead_compactions);
|
||||
Log(log," Options.purge_redundant_kvs_while_flush: %d",
|
||||
purge_redundant_kvs_while_flush);
|
||||
Log(log," Options.compression_opts.window_bits: %d",
|
||||
@ -194,10 +190,6 @@ Options::Dump(Logger* log) const
|
||||
purge_redundant_kvs_while_flush);
|
||||
Log(log," Options.allow_os_buffer: %d",
|
||||
allow_os_buffer);
|
||||
Log(log," Options.allow_readahead: %d",
|
||||
allow_readahead);
|
||||
Log(log," Options.allow_readahead_compactions: %d",
|
||||
allow_readahead_compactions);
|
||||
Log(log," Options.allow_mmap_reads: %d",
|
||||
allow_mmap_reads);
|
||||
Log(log," Options.allow_mmap_writes: %d",
|
||||
@ -220,6 +212,8 @@ Options::Dump(Logger* log) const
|
||||
bytes_per_sync);
|
||||
Log(log," Options.compaction_style: %d",
|
||||
compaction_style);
|
||||
Log(log," Options.deletes_check_filter_first: %d",
|
||||
deletes_check_filter_first);
|
||||
Log(log," Options.compaction_options_universal.size_ratio: %d",
|
||||
compaction_options_universal.size_ratio);
|
||||
Log(log," Options.compaction_options_universal.min_merge_width: %d",
|
||||
|
@ -158,6 +158,10 @@ std::vector<Status> DBWithTTL::MultiGet(const ReadOptions& options,
|
||||
supported with TTL"));
|
||||
}
|
||||
|
||||
bool DBWithTTL::KeyMayExist(const Slice& key) {
|
||||
return db_->KeyMayExist(key);
|
||||
}
|
||||
|
||||
Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) {
|
||||
return db_->Delete(wopts, key);
|
||||
}
|
||||
|
@ -33,6 +33,8 @@ class DBWithTTL : public DB, CompactionFilter {
|
||||
const std::vector<Slice>& keys,
|
||||
std::vector<std::string>* values);
|
||||
|
||||
virtual bool KeyMayExist(const Slice& key);
|
||||
|
||||
virtual Status Delete(const WriteOptions& wopts, const Slice& key);
|
||||
|
||||
virtual Status Merge(const WriteOptions& options,
|
||||
|
Loading…
Reference in New Issue
Block a user