plain table reader: avoid re-read the same position for index and data in non-mmap mode

Summary: In non-mmap mode, plain table reader can issue two pread() for index checking and reading the actual data, although it's for the same location. By reusing the key decoder, we reuse the buffer used for the two to avoid it.

Test Plan: Run unit tests. Run table_reader_bench and see from strace the repeat read cases to disappear.

Reviewers: anthony, yhchiang, rven, kradhakrishnan, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D50949
This commit is contained in:
sdong 2015-11-17 18:29:40 -08:00
parent d5239f8709
commit 9d0b8f19d9
3 changed files with 18 additions and 19 deletions

View File

@ -409,7 +409,8 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
return Status::OK();
}
Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder,
const Slice& target, const Slice& prefix,
uint32_t prefix_hash, bool& prefix_matched,
uint32_t* offset) const {
prefix_matched = false;
@ -435,15 +436,12 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
return Status::Corruption(Slice());
}
PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
ioptions_.prefix_extractor);
// The key is between [low, high). Do a binary search between it.
while (high - low > 1) {
uint32_t mid = (high + low) / 2;
uint32_t file_offset = GetFixed32Element(base_ptr, mid);
uint32_t tmp;
Status s = decoder.NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
Status s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
if (!s.ok()) {
return s;
}
@ -468,7 +466,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
ParsedInternalKey low_key;
uint32_t tmp;
uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
Status s = decoder.NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
Status s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
if (!s.ok()) {
return s;
}
@ -559,8 +557,10 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
}
uint32_t offset;
bool prefix_match;
Status s =
GetOffset(target, prefix_slice, prefix_hash, prefix_match, &offset);
PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
ioptions_.prefix_extractor);
Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash,
prefix_match, &offset);
if (!s.ok()) {
return s;
@ -571,8 +571,6 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target,
return Status::Corruption(Slice());
}
Slice found_value;
PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
ioptions_.prefix_extractor);
while (offset < file_info_.data_end_offset) {
s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
if (!s.ok()) {
@ -662,8 +660,8 @@ void PlainTableIterator::Seek(const Slice& target) {
}
}
bool prefix_match;
status_ = table_->GetOffset(target, prefix_slice, prefix_hash, prefix_match,
&next_offset_);
status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash,
prefix_match, &next_offset_);
if (!status_.ok()) {
offset_ = next_offset_ = table_->file_info_.data_end_offset;
return;

View File

@ -219,9 +219,9 @@ class PlainTableReader: public TableReader {
// Get file offset for key target.
// return value prefix_matched is set to true if the offset is confirmed
// for a key with the same prefix as target.
Status GetOffset(const Slice& target, const Slice& prefix,
uint32_t prefix_hash, bool& prefix_matched,
uint32_t* offset) const;
Status GetOffset(PlainTableKeyDecoder* decoder, const Slice& target,
const Slice& prefix, uint32_t prefix_hash,
bool& prefix_matched, uint32_t* offset) const;
bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }

View File

@ -258,6 +258,7 @@ DEFINE_bool(iterator, false, "For test iterator");
DEFINE_bool(through_db, false, "If enable, a DB instance will be created and "
"the query will be against DB. Otherwise, will be directly against "
"a table reader.");
DEFINE_bool(mmap_read, true, "Whether use mmap read");
DEFINE_string(table_factory, "block_based",
"Table factory to use: `block_based` (default), `plain_table` or "
"`cuckoo_hash`.");
@ -283,8 +284,8 @@ int main(int argc, char** argv) {
if (FLAGS_table_factory == "cuckoo_hash") {
#ifndef ROCKSDB_LITE
options.allow_mmap_reads = true;
env_options.use_mmap_reads = true;
options.allow_mmap_reads = FLAGS_mmap_read;
env_options.use_mmap_reads = FLAGS_mmap_read;
rocksdb::CuckooTableOptions table_options;
table_options.hash_table_ratio = 0.75;
tf.reset(rocksdb::NewCuckooTableFactory(table_options));
@ -294,8 +295,8 @@ int main(int argc, char** argv) {
#endif // ROCKSDB_LITE
} else if (FLAGS_table_factory == "plain_table") {
#ifndef ROCKSDB_LITE
options.allow_mmap_reads = true;
env_options.use_mmap_reads = true;
options.allow_mmap_reads = FLAGS_mmap_read;
env_options.use_mmap_reads = FLAGS_mmap_read;
rocksdb::PlainTableOptions plain_table_options;
plain_table_options.user_key_len = 16;