[column families] Iterator and MultiGet

Summary: Support for different column families in Iterator and MultiGet code path.

Test Plan: make check

Reviewers: dhruba, haobo, kailiu, sdong

CC: leveldb

Differential Revision: https://reviews.facebook.net/D15849
This commit is contained in:
Igor Canadi 2014-02-03 15:28:03 -08:00
parent 2a9271b403
commit 0e22badc08
7 changed files with 202 additions and 164 deletions

View File

@ -32,9 +32,10 @@ SuperVersion* SuperVersion::Ref() {
} }
bool SuperVersion::Unref() { bool SuperVersion::Unref() {
assert(refs > 0);
// fetch_sub returns the previous value of ref // fetch_sub returns the previous value of ref
return refs.fetch_sub(1, std::memory_order_relaxed) == 1; uint32_t previous_refs = refs.fetch_sub(1, std::memory_order_relaxed);
assert(previous_refs > 0);
return previous_refs == 1;
} }
void SuperVersion::Cleanup() { void SuperVersion::Cleanup() {

View File

@ -1423,10 +1423,6 @@ int DBImpl::Level0StopWriteTrigger(const ColumnFamilyHandle& column_family) {
return cfd->options()->level0_stop_writes_trigger; return cfd->options()->level0_stop_writes_trigger;
} }
uint64_t DBImpl::CurrentVersionNumber() const {
return default_cfd_->GetSuperVersionNumber();
}
Status DBImpl::Flush(const FlushOptions& options, Status DBImpl::Flush(const FlushOptions& options,
const ColumnFamilyHandle& column_family) { const ColumnFamilyHandle& column_family) {
mutex_.Lock(); mutex_.Lock();
@ -2724,12 +2720,8 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
} // namespace } // namespace
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
SequenceNumber* latest_snapshot) { ColumnFamilyData* cfd,
mutex_.Lock(); SuperVersion* super_version) {
*latest_snapshot = versions_->LastSequence();
SuperVersion* super_version = default_cfd_->GetSuperVersion()->Ref();
mutex_.Unlock();
std::vector<Iterator*> iterator_list; std::vector<Iterator*> iterator_list;
// Collect iterator for mutable mem // Collect iterator for mutable mem
iterator_list.push_back(super_version->mem->NewIterator(options)); iterator_list.push_back(super_version->mem->NewIterator(options));
@ -2738,9 +2730,8 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
// Collect iterators for files in L0 - Ln // Collect iterators for files in L0 - Ln
super_version->current->AddIterators(options, storage_options_, super_version->current->AddIterators(options, storage_options_,
&iterator_list); &iterator_list);
Iterator* internal_iter = Iterator* internal_iter = NewMergingIterator(
NewMergingIterator(&default_cfd_->internal_comparator(), &cfd->internal_comparator(), &iterator_list[0], iterator_list.size());
&iterator_list[0], iterator_list.size());
IterState* cleanup = new IterState(this, &mutex_, super_version); IterState* cleanup = new IterState(this, &mutex_, super_version);
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr); internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
@ -2749,18 +2740,20 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
} }
Iterator* DBImpl::TEST_NewInternalIterator() { Iterator* DBImpl::TEST_NewInternalIterator() {
SequenceNumber ignored; mutex_.Lock();
return NewInternalIterator(ReadOptions(), &ignored); SuperVersion* super_version = default_cfd_->GetSuperVersion()->Ref();
mutex_.Unlock();
return NewInternalIterator(ReadOptions(), default_cfd_, super_version);
} }
std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair( std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
const ReadOptions& options, const ReadOptions& options, ColumnFamilyData* cfd,
uint64_t* superversion_number) { uint64_t* superversion_number) {
mutex_.Lock(); mutex_.Lock();
SuperVersion* super_version = default_cfd_->GetSuperVersion()->Ref(); SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
if (superversion_number != nullptr) { if (superversion_number != nullptr) {
*superversion_number = CurrentVersionNumber(); *superversion_number = cfd->GetSuperVersionNumber();
} }
mutex_.Unlock(); mutex_.Unlock();
@ -2772,8 +2765,8 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
std::vector<Iterator*> list; std::vector<Iterator*> list;
super_version->imm->AddIterators(options, &list); super_version->imm->AddIterators(options, &list);
super_version->current->AddIterators(options, storage_options_, &list); super_version->current->AddIterators(options, storage_options_, &list);
Iterator* immutable_iter = NewMergingIterator( Iterator* immutable_iter =
&default_cfd_->internal_comparator(), &list[0], list.size()); NewMergingIterator(&cfd->internal_comparator(), &list[0], list.size());
// create a DBIter that only uses memtable content; see NewIterator() // create a DBIter that only uses memtable content; see NewIterator()
immutable_iter = NewDBIterator(&dbname_, env_, options_, user_comparator(), immutable_iter = NewDBIterator(&dbname_, env_, options_, user_comparator(),
@ -2910,84 +2903,106 @@ std::vector<Status> DBImpl::MultiGet(
StopWatch sw(env_, options_.statistics.get(), DB_MULTIGET, false); StopWatch sw(env_, options_.statistics.get(), DB_MULTIGET, false);
SequenceNumber snapshot; SequenceNumber snapshot;
struct MultiGetColumnFamilyData {
SuperVersion* super_version;
Version::GetStats stats;
bool have_stat_update = false;
};
std::unordered_map<uint32_t, MultiGetColumnFamilyData*> multiget_cf_data;
// fill up and allocate outside of mutex
for (auto cf : column_family) {
if (multiget_cf_data.find(cf.id) == multiget_cf_data.end()) {
multiget_cf_data.insert({cf.id, new MultiGetColumnFamilyData()});
}
}
mutex_.Lock(); mutex_.Lock();
if (options.snapshot != nullptr) { if (options.snapshot != nullptr) {
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_; snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
} else { } else {
snapshot = versions_->LastSequence(); snapshot = versions_->LastSequence();
} }
for (auto mgd_iter : multiget_cf_data) {
SuperVersion* get_version = default_cfd_->GetSuperVersion()->Ref(); auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(mgd_iter.first);
assert(cfd != nullptr);
mgd_iter.second->super_version = cfd->GetSuperVersion()->Ref();
}
mutex_.Unlock(); mutex_.Unlock();
bool have_stat_update = false;
Version::GetStats stats;
// Contain a list of merge operations if merge occurs. // Contain a list of merge operations if merge occurs.
MergeContext merge_context; MergeContext merge_context;
// Note: this always resizes the values array // Note: this always resizes the values array
int numKeys = keys.size(); size_t num_keys = keys.size();
std::vector<Status> statList(numKeys); std::vector<Status> stat_list(num_keys);
values->resize(numKeys); values->resize(num_keys);
// Keep track of bytes that we read for statistics-recording later // Keep track of bytes that we read for statistics-recording later
uint64_t bytesRead = 0; uint64_t bytes_read = 0;
// For each of the given keys, apply the entire "get" process as follows: // For each of the given keys, apply the entire "get" process as follows:
// First look in the memtable, then in the immutable memtable (if any). // First look in the memtable, then in the immutable memtable (if any).
// s is both in/out. When in, s could either be OK or MergeInProgress. // s is both in/out. When in, s could either be OK or MergeInProgress.
// merge_operands will contain the sequence of merges in the latter case. // merge_operands will contain the sequence of merges in the latter case.
for (int i=0; i<numKeys; ++i) { for (size_t i = 0; i < num_keys; ++i) {
merge_context.Clear(); merge_context.Clear();
Status& s = statList[i]; Status& s = stat_list[i];
std::string* value = &(*values)[i]; std::string* value = &(*values)[i];
LookupKey lkey(keys[i], snapshot); LookupKey lkey(keys[i], snapshot);
if (get_version->mem->Get(lkey, value, &s, merge_context, options_)) { auto mgd_iter = multiget_cf_data.find(column_family[i].id);
assert(mgd_iter != multiget_cf_data.end());
auto mgd = mgd_iter->second;
auto super_version = mgd->super_version;
if (super_version->mem->Get(lkey, value, &s, merge_context, options_)) {
// Done // Done
} else if (get_version->imm->Get(lkey, value, &s, merge_context, } else if (super_version->imm->Get(lkey, value, &s, merge_context,
options_)) { options_)) {
// Done // Done
} else { } else {
get_version->current->Get(options, lkey, value, &s, &merge_context, super_version->current->Get(options, lkey, value, &s, &merge_context,
&stats, options_); &mgd->stats, options_);
have_stat_update = true; mgd->have_stat_update = true;
} }
if (s.ok()) { if (s.ok()) {
bytesRead += value->size(); bytes_read += value->size();
} }
} }
bool delete_get_version = false; autovector<SuperVersion*> superversions_to_delete;
if (!options_.disable_seek_compaction && have_stat_update) {
mutex_.Lock(); bool schedule_flush_or_compaction = false;
if (get_version->current->UpdateStats(stats)) { mutex_.Lock();
MaybeScheduleFlushOrCompaction(); for (auto mgd_iter : multiget_cf_data) {
auto mgd = mgd_iter.second;
if (!options_.disable_seek_compaction && mgd->have_stat_update) {
if (mgd->super_version->current->UpdateStats(mgd->stats)) {
schedule_flush_or_compaction = true;
}
} }
if (get_version->Unref()) { if (mgd->super_version->Unref()) {
get_version->Cleanup(); mgd->super_version->Cleanup();
delete_get_version = true; superversions_to_delete.push_back(mgd->super_version);
}
mutex_.Unlock();
} else {
if (get_version->Unref()) {
mutex_.Lock();
get_version->Cleanup();
mutex_.Unlock();
delete_get_version = true;
} }
} }
if (delete_get_version) { if (schedule_flush_or_compaction) {
delete get_version; MaybeScheduleFlushOrCompaction();
}
mutex_.Unlock();
for (auto td : superversions_to_delete) {
delete td;
}
for (auto mgd : multiget_cf_data) {
delete mgd.second;
} }
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_CALLS); RecordTick(options_.statistics.get(), NUMBER_MULTIGET_CALLS);
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_KEYS_READ, numKeys); RecordTick(options_.statistics.get(), NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_BYTES_READ, bytesRead); RecordTick(options_.statistics.get(), NUMBER_MULTIGET_BYTES_READ, bytes_read);
return statList; return stat_list;
} }
Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options, Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
@ -3056,19 +3071,28 @@ bool DBImpl::KeyMayExist(const ReadOptions& options,
Iterator* DBImpl::NewIterator(const ReadOptions& options, Iterator* DBImpl::NewIterator(const ReadOptions& options,
const ColumnFamilyHandle& column_family) { const ColumnFamilyHandle& column_family) {
Iterator* iter; SequenceNumber latest_snapshot = 0;
SuperVersion* super_version = nullptr;
mutex_.Lock();
auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
assert(cfd != nullptr);
if (!options.tailing) {
super_version = cfd->GetSuperVersion()->Ref();
latest_snapshot = versions_->LastSequence();
}
mutex_.Unlock();
Iterator* iter;
if (options.tailing) { if (options.tailing) {
iter = new TailingIterator(this, options, user_comparator()); iter = new TailingIterator(this, options, cfd);
} else { } else {
SequenceNumber latest_snapshot; iter = NewInternalIterator(options, cfd, super_version);
iter = NewInternalIterator(options, &latest_snapshot);
iter = NewDBIterator( iter = NewDBIterator(
&dbname_, env_, options_, user_comparator(), iter, &dbname_, env_, options_, cfd->user_comparator(), iter,
(options.snapshot != nullptr (options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
: latest_snapshot)); : latest_snapshot));
} }
if (options.prefix) { if (options.prefix) {
@ -3529,6 +3553,7 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
value->clear(); value->clear();
MutexLock l(&mutex_); MutexLock l(&mutex_);
auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id); auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
assert(cfd != nullptr);
return internal_stats_.GetProperty(property, value, cfd); return internal_stats_.GetProperty(property, value, cfd);
} }
@ -3538,7 +3563,10 @@ void DBImpl::GetApproximateSizes(const ColumnFamilyHandle& column_family,
Version* v; Version* v;
{ {
MutexLock l(&mutex_); MutexLock l(&mutex_);
v = default_cfd_->current(); auto cfd =
versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
assert(cfd != nullptr);
v = cfd->current();
v->Ref(); v->Ref();
} }

View File

@ -257,10 +257,8 @@ class DBImpl : public DB {
return internal_comparator_.user_comparator(); return internal_comparator_.user_comparator();
} }
ColumnFamilyData* GetDefaultColumnFamily() { return default_cfd_; } Iterator* NewInternalIterator(const ReadOptions&, ColumnFamilyData* cfd,
SuperVersion* super_version);
Iterator* NewInternalIterator(const ReadOptions&,
SequenceNumber* latest_snapshot);
private: private:
friend class DB; friend class DB;
@ -367,16 +365,13 @@ class DBImpl : public DB {
// hold the data set. // hold the data set.
Status ReFitLevel(ColumnFamilyData* cfd, int level, int target_level = -1); Status ReFitLevel(ColumnFamilyData* cfd, int level, int target_level = -1);
// Returns the current SuperVersion number.
uint64_t CurrentVersionNumber() const;
// Returns a pair of iterators (mutable-only and immutable-only) used // Returns a pair of iterators (mutable-only and immutable-only) used
// internally by TailingIterator and stores CurrentVersionNumber() in // internally by TailingIterator and stores cfd->GetSuperVersionNumber() in
// *superversion_number. These iterators are always up-to-date, i.e. can // *superversion_number. These iterators are always up-to-date, i.e. can
// be used to read new data. // be used to read new data.
std::pair<Iterator*, Iterator*> GetTailingIteratorPair( std::pair<Iterator*, Iterator*> GetTailingIteratorPair(
const ReadOptions& options, const ReadOptions& options, ColumnFamilyData* cfd,
uint64_t* superversion_number); uint64_t* superversion_number);
// Constant after construction // Constant after construction
const InternalFilterPolicy internal_filter_policy_; const InternalFilterPolicy internal_filter_policy_;

View File

@ -29,6 +29,7 @@
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/column_family.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
@ -57,7 +58,8 @@ Status DBImplReadOnly::Get(const ReadOptions& options,
const Slice& key, std::string* value) { const Slice& key, std::string* value) {
Status s; Status s;
SequenceNumber snapshot = versions_->LastSequence(); SequenceNumber snapshot = versions_->LastSequence();
SuperVersion* super_version = GetDefaultColumnFamily()->GetSuperVersion(); auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
SuperVersion* super_version = cfd->GetSuperVersion();
MergeContext merge_context; MergeContext merge_context;
LookupKey lkey(key, snapshot); LookupKey lkey(key, snapshot);
if (super_version->mem->Get(lkey, value, &s, merge_context, options_)) { if (super_version->mem->Get(lkey, value, &s, merge_context, options_)) {
@ -69,14 +71,18 @@ Status DBImplReadOnly::Get(const ReadOptions& options,
return s; return s;
} }
Iterator* DBImplReadOnly::NewIterator(const ReadOptions& options) { Iterator* DBImplReadOnly::NewIterator(const ReadOptions& options,
SequenceNumber latest_snapshot; const ColumnFamilyHandle& column_family) {
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot); auto cfd = versions_->GetColumnFamilySet()->GetColumnFamily(column_family.id);
assert(cfd != nullptr);
SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
SequenceNumber latest_snapshot = versions_->LastSequence();
Iterator* internal_iter = NewInternalIterator(options, cfd, super_version);
return NewDBIterator( return NewDBIterator(
&dbname_, env_, options_, user_comparator(),internal_iter, &dbname_, env_, options_, user_comparator(), internal_iter,
(options.snapshot != nullptr (options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
: latest_snapshot)); : latest_snapshot));
} }
Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,

View File

@ -12,6 +12,8 @@
#include <deque> #include <deque>
#include <set> #include <set>
#include <vector>
#include <string>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/log_writer.h" #include "db/log_writer.h"
#include "db/snapshot.h" #include "db/snapshot.h"
@ -23,79 +25,80 @@
namespace rocksdb { namespace rocksdb {
class DBImplReadOnly : public DBImpl { class DBImplReadOnly : public DBImpl {
public: public:
DBImplReadOnly(const Options& options, const std::string& dbname); DBImplReadOnly(const Options& options, const std::string& dbname);
virtual ~DBImplReadOnly(); virtual ~DBImplReadOnly();
// Implementations of the DB interface // Implementations of the DB interface
using DB::Get; using DB::Get;
virtual Status Get(const ReadOptions& options, virtual Status Get(const ReadOptions& options,
const ColumnFamilyHandle& column_family, const Slice& key, const ColumnFamilyHandle& column_family, const Slice& key,
std::string* value); std::string* value);
// TODO: Implement ReadOnly MultiGet? // TODO: Implement ReadOnly MultiGet?
using DBImpl::NewIterator; using DBImpl::NewIterator;
virtual Iterator* NewIterator(const ReadOptions&); virtual Iterator* NewIterator(const ReadOptions&,
const ColumnFamilyHandle& column_family);
virtual Status NewIterators( virtual Status NewIterators(
const ReadOptions& options, const ReadOptions& options,
const std::vector<ColumnFamilyHandle>& column_family, const std::vector<ColumnFamilyHandle>& column_family,
std::vector<Iterator*>* iterators) { std::vector<Iterator*>* iterators) {
// TODO // TODO
return Status::NotSupported("Not supported yet."); return Status::NotSupported("Not supported yet.");
} }
using DBImpl::Put; using DBImpl::Put;
virtual Status Put(const WriteOptions& options, virtual Status Put(const WriteOptions& options,
const ColumnFamilyHandle& column_family, const Slice& key, const ColumnFamilyHandle& column_family, const Slice& key,
const Slice& value) { const Slice& value) {
return Status::NotSupported("Not supported operation in read only mode."); return Status::NotSupported("Not supported operation in read only mode.");
} }
using DBImpl::Merge; using DBImpl::Merge;
virtual Status Merge(const WriteOptions& options, virtual Status Merge(const WriteOptions& options,
const ColumnFamilyHandle& column_family, const Slice& key,
const Slice& value) {
return Status::NotSupported("Not supported operation in read only mode.");
}
using DBImpl::Delete;
virtual Status Delete(const WriteOptions& options,
const ColumnFamilyHandle& column_family, const ColumnFamilyHandle& column_family,
const Slice& key) { const Slice& key, const Slice& value) {
return Status::NotSupported("Not supported operation in read only mode."); return Status::NotSupported("Not supported operation in read only mode.");
} }
virtual Status Write(const WriteOptions& options, WriteBatch* updates) { using DBImpl::Delete;
return Status::NotSupported("Not supported operation in read only mode."); virtual Status Delete(const WriteOptions& options,
} const ColumnFamilyHandle& column_family,
using DBImpl::CompactRange; const Slice& key) {
virtual Status CompactRange(const ColumnFamilyHandle& column_family, return Status::NotSupported("Not supported operation in read only mode.");
const Slice* begin, const Slice* end, }
bool reduce_level = false, int target_level = -1) { virtual Status Write(const WriteOptions& options, WriteBatch* updates) {
return Status::NotSupported("Not supported operation in read only mode."); return Status::NotSupported("Not supported operation in read only mode.");
} }
virtual Status DisableFileDeletions() { using DBImpl::CompactRange;
return Status::NotSupported("Not supported operation in read only mode."); virtual Status CompactRange(const ColumnFamilyHandle& column_family,
} const Slice* begin, const Slice* end,
virtual Status EnableFileDeletions(bool force) { bool reduce_level = false,
return Status::NotSupported("Not supported operation in read only mode."); int target_level = -1) {
} return Status::NotSupported("Not supported operation in read only mode.");
virtual Status GetLiveFiles(std::vector<std::string>&, }
uint64_t* manifest_file_size, virtual Status DisableFileDeletions() {
bool flush_memtable = true) { return Status::NotSupported("Not supported operation in read only mode.");
return Status::NotSupported("Not supported operation in read only mode."); }
} virtual Status EnableFileDeletions(bool force) {
using DBImpl::Flush; return Status::NotSupported("Not supported operation in read only mode.");
virtual Status Flush(const FlushOptions& options, }
const ColumnFamilyHandle& column_family) { virtual Status GetLiveFiles(std::vector<std::string>&,
return Status::NotSupported("Not supported operation in read only mode."); uint64_t* manifest_file_size,
} bool flush_memtable = true) {
return Status::NotSupported("Not supported operation in read only mode.");
}
using DBImpl::Flush;
virtual Status Flush(const FlushOptions& options,
const ColumnFamilyHandle& column_family) {
return Status::NotSupported("Not supported operation in read only mode.");
}
private: private:
friend class DB; friend class DB;
// No copying allowed // No copying allowed
DBImplReadOnly(const DBImplReadOnly&); DBImplReadOnly(const DBImplReadOnly&);
void operator=(const DBImplReadOnly&); void operator=(const DBImplReadOnly&);
}; };
} }

View File

@ -8,15 +8,19 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "db/db_impl.h" #include "db/db_impl.h"
#include "db/column_family.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
namespace rocksdb { namespace rocksdb {
TailingIterator::TailingIterator(DBImpl* db, const ReadOptions& options, TailingIterator::TailingIterator(DBImpl* db, const ReadOptions& options,
const Comparator* comparator) ColumnFamilyData* cfd)
: db_(db), options_(options), comparator_(comparator), : db_(db),
version_number_(0), current_(nullptr), options_(options),
cfd_(cfd),
version_number_(0),
current_(nullptr),
status_(Status::InvalidArgument("Seek() not called on this iterator")) {} status_(Status::InvalidArgument("Seek() not called on this iterator")) {}
bool TailingIterator::Valid() const { bool TailingIterator::Valid() const {
@ -53,10 +57,9 @@ void TailingIterator::Seek(const Slice& target) {
// 'target' -- in this case, prev_key_ is included in the interval, so // 'target' -- in this case, prev_key_ is included in the interval, so
// prev_inclusive_ has to be set. // prev_inclusive_ has to be set.
if (!is_prev_set_ || const Comparator* cmp = cfd_->user_comparator();
comparator_->Compare(prev_key_, target) >= !is_prev_inclusive_ || if (!is_prev_set_ || cmp->Compare(prev_key_, target) >= !is_prev_inclusive_ ||
(immutable_->Valid() && (immutable_->Valid() && cmp->Compare(target, immutable_->key()) > 0) ||
comparator_->Compare(target, immutable_->key()) > 0) ||
(options_.prefix_seek && !IsSamePrefix(target))) { (options_.prefix_seek && !IsSamePrefix(target))) {
SeekImmutable(target); SeekImmutable(target);
} }
@ -121,7 +124,7 @@ void TailingIterator::SeekToLast() {
void TailingIterator::CreateIterators() { void TailingIterator::CreateIterators() {
std::pair<Iterator*, Iterator*> iters = std::pair<Iterator*, Iterator*> iters =
db_->GetTailingIteratorPair(options_, &version_number_); db_->GetTailingIteratorPair(options_, cfd_, &version_number_);
assert(iters.first && iters.second); assert(iters.first && iters.second);
@ -137,9 +140,10 @@ void TailingIterator::UpdateCurrent() {
if (mutable_->Valid()) { if (mutable_->Valid()) {
current_ = mutable_.get(); current_ = mutable_.get();
} }
const Comparator* cmp = cfd_->user_comparator();
if (immutable_->Valid() && if (immutable_->Valid() &&
(current_ == nullptr || (current_ == nullptr ||
comparator_->Compare(immutable_->key(), current_->key()) < 0)) { cmp->Compare(immutable_->key(), current_->key()) < 0)) {
current_ = immutable_.get(); current_ = immutable_.get();
} }
@ -151,11 +155,11 @@ void TailingIterator::UpdateCurrent() {
bool TailingIterator::IsCurrentVersion() const { bool TailingIterator::IsCurrentVersion() const {
return mutable_ != nullptr && immutable_ != nullptr && return mutable_ != nullptr && immutable_ != nullptr &&
version_number_ == db_->CurrentVersionNumber(); version_number_ == cfd_->GetSuperVersionNumber();
} }
bool TailingIterator::IsSamePrefix(const Slice& target) const { bool TailingIterator::IsSamePrefix(const Slice& target) const {
const SliceTransform* extractor = db_->options_.prefix_extractor; const SliceTransform* extractor = cfd_->options()->prefix_extractor;
assert(extractor); assert(extractor);
assert(is_prev_set_); assert(is_prev_set_);

View File

@ -13,6 +13,7 @@
namespace rocksdb { namespace rocksdb {
class DBImpl; class DBImpl;
class ColumnFamilyData;
/** /**
* TailingIterator is a special type of iterator that doesn't use an (implicit) * TailingIterator is a special type of iterator that doesn't use an (implicit)
@ -25,7 +26,7 @@ class DBImpl;
class TailingIterator : public Iterator { class TailingIterator : public Iterator {
public: public:
TailingIterator(DBImpl* db, const ReadOptions& options, TailingIterator(DBImpl* db, const ReadOptions& options,
const Comparator* comparator); ColumnFamilyData* cfd);
virtual ~TailingIterator() {} virtual ~TailingIterator() {}
virtual bool Valid() const override; virtual bool Valid() const override;
@ -41,7 +42,7 @@ class TailingIterator : public Iterator {
private: private:
DBImpl* const db_; DBImpl* const db_;
const ReadOptions options_; const ReadOptions options_;
const Comparator* const comparator_; ColumnFamilyData* const cfd_;
uint64_t version_number_; uint64_t version_number_;
// TailingIterator merges the contents of the two iterators below (one using // TailingIterator merges the contents of the two iterators below (one using