CappedFixTransform: return fixed length prefix, or full key if key is shorter than the fixed length
Summary: Add CappedFixTransform, which is the same as fixed length prefix extractor, except that when slice is shorter than the fixed length, it will use the full key. Test Plan: Add a test case for db_test options_test and a new test Reviewers: yhchiang, rven, igor Reviewed By: igor Subscribers: MarkCallaghan, leveldb, dhruba, yoshinorim Differential Revision: https://reviews.facebook.net/D31887
This commit is contained in:
parent
6c6037f60c
commit
5917de0bae
@ -12,6 +12,7 @@
|
||||
* Added new block based table format (version 2), which you can enable by setting BlockBasedTableOptions.format_version = 2. This format changes how we encode size information in compressed blocks and should help with memory allocations if you're using Zlib or BZip2 compressions.
|
||||
* GetThreadStatus() is now able to report compaction activity.
|
||||
* MemEnv (env that stores data in memory) is now available in default library build. You can create it by calling NewMemEnv().
|
||||
* Add SliceTransform.SameResultWhenAppended() to help users determine it is safe to apply prefix bloom/hash.
|
||||
|
||||
### Public API changes
|
||||
* Deprecated skip_log_error_on_recovery option
|
||||
|
5
Makefile
5
Makefile
@ -128,6 +128,7 @@ TESTS = \
|
||||
coding_test \
|
||||
corruption_test \
|
||||
crc32c_test \
|
||||
slice_transform_test \
|
||||
dbformat_test \
|
||||
env_test \
|
||||
fault_injection_test \
|
||||
@ -403,6 +404,10 @@ corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
|
||||
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
|
@ -389,28 +389,29 @@ class DBTest {
|
||||
kBlockBasedTableWithPrefixHashIndex = 1,
|
||||
kBlockBasedTableWithWholeKeyHashIndex = 2,
|
||||
kPlainTableFirstBytePrefix = 3,
|
||||
kPlainTableAllBytesPrefix = 4,
|
||||
kVectorRep = 5,
|
||||
kHashLinkList = 6,
|
||||
kHashCuckoo = 7,
|
||||
kMergePut = 8,
|
||||
kFilter = 9,
|
||||
kFullFilter = 10,
|
||||
kUncompressed = 11,
|
||||
kNumLevel_3 = 12,
|
||||
kDBLogDir = 13,
|
||||
kWalDirAndMmapReads = 14,
|
||||
kManifestFileSize = 15,
|
||||
kCompactOnFlush = 16,
|
||||
kPerfOptions = 17,
|
||||
kDeletesFilterFirst = 18,
|
||||
kHashSkipList = 19,
|
||||
kUniversalCompaction = 20,
|
||||
kCompressedBlockCache = 21,
|
||||
kInfiniteMaxOpenFiles = 22,
|
||||
kxxHashChecksum = 23,
|
||||
kFIFOCompaction = 24,
|
||||
kEnd = 25
|
||||
kPlainTableCappedPrefix = 4,
|
||||
kPlainTableAllBytesPrefix = 5,
|
||||
kVectorRep = 6,
|
||||
kHashLinkList = 7,
|
||||
kHashCuckoo = 8,
|
||||
kMergePut = 9,
|
||||
kFilter = 10,
|
||||
kFullFilter = 11,
|
||||
kUncompressed = 12,
|
||||
kNumLevel_3 = 13,
|
||||
kDBLogDir = 14,
|
||||
kWalDirAndMmapReads = 15,
|
||||
kManifestFileSize = 16,
|
||||
kCompactOnFlush = 17,
|
||||
kPerfOptions = 18,
|
||||
kDeletesFilterFirst = 19,
|
||||
kHashSkipList = 20,
|
||||
kUniversalCompaction = 21,
|
||||
kCompressedBlockCache = 22,
|
||||
kInfiniteMaxOpenFiles = 23,
|
||||
kxxHashChecksum = 24,
|
||||
kFIFOCompaction = 25,
|
||||
kEnd = 26
|
||||
};
|
||||
int option_config_;
|
||||
|
||||
@ -483,9 +484,10 @@ class DBTest {
|
||||
option_config_ == kHashSkipList)) {;
|
||||
continue;
|
||||
}
|
||||
if ((skip_mask & kSkipPlainTable)
|
||||
&& (option_config_ == kPlainTableAllBytesPrefix
|
||||
|| option_config_ == kPlainTableFirstBytePrefix)) {
|
||||
if ((skip_mask & kSkipPlainTable) &&
|
||||
(option_config_ == kPlainTableAllBytesPrefix ||
|
||||
option_config_ == kPlainTableFirstBytePrefix ||
|
||||
option_config_ == kPlainTableCappedPrefix)) {
|
||||
continue;
|
||||
}
|
||||
if ((skip_mask & kSkipHashIndex) &&
|
||||
@ -577,6 +579,13 @@ class DBTest {
|
||||
options.max_sequential_skip_in_iterations = 999999;
|
||||
set_block_based_table_factory = false;
|
||||
break;
|
||||
case kPlainTableCappedPrefix:
|
||||
options.table_factory.reset(new PlainTableFactory());
|
||||
options.prefix_extractor.reset(NewCappedPrefixTransform(8));
|
||||
options.allow_mmap_reads = true;
|
||||
options.max_sequential_skip_in_iterations = 999999;
|
||||
set_block_based_table_factory = false;
|
||||
break;
|
||||
case kPlainTableAllBytesPrefix:
|
||||
options.table_factory.reset(new PlainTableFactory());
|
||||
options.prefix_extractor.reset(NewNoopTransform());
|
||||
|
@ -36,10 +36,37 @@ class SliceTransform {
|
||||
|
||||
// determine whether dst=Transform(src) for some src
|
||||
virtual bool InRange(const Slice& dst) const = 0;
|
||||
|
||||
// Transform(s)=Transform(`prefix`) for any s with `prefix` as a prefix.
|
||||
//
|
||||
// This function is not used by RocksDB, but for users. If users pass
|
||||
// Options by string to RocksDB, they might not know what prefix extractor
|
||||
// they are using. This function is to help users can determine:
|
||||
// if they want to iterate all keys prefixing `prefix`, whetherit is
|
||||
// safe to use prefix bloom filter and seek to key `prefix`.
|
||||
// Only returning false indicates it is correct to do that.
|
||||
//
|
||||
// Here is an example: Suppose we implement a slice transform that returns
|
||||
// the first part of the string after spliting it using deimiter ",":
|
||||
// 1. SameResultWhenAppended("abc,") should return true. If aplying prefix
|
||||
// bloom filter using it, all slices matching "abc:.*" will be extracted
|
||||
// to "abc,", so any SST file or memtable containing any of those key
|
||||
// will not be filtered out.
|
||||
// 2. SameResultWhenAppended("abc") should return false. A user will not
|
||||
// guaranteed to see all the keys matching "abc.*" if a user seek to "abc"
|
||||
// against a DB with the same setting. If one SST file only contains
|
||||
// "abcd,e", the file can be filtered out and the key will be invisible.
|
||||
//
|
||||
// i.e., an implementation always returning false is safe.
|
||||
virtual bool SameResultWhenAppended(const Slice& prefix) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
extern const SliceTransform* NewFixedPrefixTransform(size_t prefix_len);
|
||||
|
||||
extern const SliceTransform* NewCappedPrefixTransform(size_t cap_len);
|
||||
|
||||
extern const SliceTransform* NewNoopTransform();
|
||||
|
||||
}
|
||||
|
@ -510,14 +510,27 @@ Status GetColumnFamilyOptionsFromMap(
|
||||
} else if (o.first == "inplace_update_support") {
|
||||
new_options->inplace_update_support = ParseBoolean(o.first, o.second);
|
||||
} else if (o.first == "prefix_extractor") {
|
||||
const std::string kName = "fixed:";
|
||||
if (o.second.compare(0, kName.size(), kName) != 0) {
|
||||
return Status::InvalidArgument("Invalid Prefix Extractor type: "
|
||||
+ o.second);
|
||||
}
|
||||
int prefix_length = ParseInt(trim(o.second.substr(kName.size())));
|
||||
const std::string kFixedPrefixName = "fixed:";
|
||||
const std::string kCappedPrefixName = "capped:";
|
||||
auto& pe_value = o.second;
|
||||
if (pe_value.size() > kFixedPrefixName.size() &&
|
||||
pe_value.compare(0, kFixedPrefixName.size(), kFixedPrefixName) ==
|
||||
0) {
|
||||
int prefix_length =
|
||||
ParseInt(trim(o.second.substr(kFixedPrefixName.size())));
|
||||
new_options->prefix_extractor.reset(
|
||||
NewFixedPrefixTransform(prefix_length));
|
||||
} else if (pe_value.size() > kCappedPrefixName.size() &&
|
||||
pe_value.compare(0, kCappedPrefixName.size(),
|
||||
kCappedPrefixName) == 0) {
|
||||
int prefix_length =
|
||||
ParseInt(trim(pe_value.substr(kCappedPrefixName.size())));
|
||||
new_options->prefix_extractor.reset(
|
||||
NewCappedPrefixTransform(prefix_length));
|
||||
} else {
|
||||
return Status::InvalidArgument("Invalid Prefix Extractor type: " +
|
||||
pe_value);
|
||||
}
|
||||
} else {
|
||||
return Status::InvalidArgument("Unrecognized option: " + o.first);
|
||||
}
|
||||
|
@ -330,10 +330,17 @@ TEST(OptionsTest, GetOptionsFromStringTest) {
|
||||
ASSERT_EQ(new_cf_opt.max_write_buffer_number, 16*1024*1024);
|
||||
ASSERT_EQ(new_cf_opt.inplace_update_num_locks, 17*1024UL*1024UL);
|
||||
// Units (g)
|
||||
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
|
||||
"write_buffer_size=18g;arena_block_size=19G", &new_cf_opt));
|
||||
ASSERT_OK(GetColumnFamilyOptionsFromString(
|
||||
base_cf_opt,
|
||||
"write_buffer_size=18g;prefix_extractor=capped:8;"
|
||||
"arena_block_size=19G",
|
||||
&new_cf_opt));
|
||||
ASSERT_EQ(new_cf_opt.write_buffer_size, 18*1024UL*1024UL*1024UL);
|
||||
ASSERT_EQ(new_cf_opt.arena_block_size, 19*1024UL*1024UL*1024UL);
|
||||
ASSERT_TRUE(new_cf_opt.prefix_extractor.get() != nullptr);
|
||||
std::string prefix_name(new_cf_opt.prefix_extractor->Name());
|
||||
ASSERT_EQ(prefix_name, "rocksdb.CappedPrefix.8");
|
||||
|
||||
// Units (t)
|
||||
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
|
||||
"write_buffer_size=20t;arena_block_size=21T", &new_cf_opt));
|
||||
|
@ -39,6 +39,38 @@ class FixedPrefixTransform : public SliceTransform {
|
||||
virtual bool InRange(const Slice& dst) const {
|
||||
return (dst.size() == prefix_len_);
|
||||
}
|
||||
|
||||
virtual bool SameResultWhenAppended(const Slice& prefix) const {
|
||||
return InDomain(prefix);
|
||||
}
|
||||
};
|
||||
|
||||
class CappedPrefixTransform : public SliceTransform {
|
||||
private:
|
||||
size_t cap_len_;
|
||||
std::string name_;
|
||||
|
||||
public:
|
||||
explicit CappedPrefixTransform(size_t cap_len)
|
||||
: cap_len_(cap_len),
|
||||
name_("rocksdb.CappedPrefix." + ToString(cap_len_)) {}
|
||||
|
||||
virtual const char* Name() const { return name_.c_str(); }
|
||||
|
||||
virtual Slice Transform(const Slice& src) const {
|
||||
assert(InDomain(src));
|
||||
return Slice(src.data(), std::min(cap_len_, src.size()));
|
||||
}
|
||||
|
||||
virtual bool InDomain(const Slice& src) const { return true; }
|
||||
|
||||
virtual bool InRange(const Slice& dst) const {
|
||||
return (dst.size() <= cap_len_);
|
||||
}
|
||||
|
||||
virtual bool SameResultWhenAppended(const Slice& prefix) const {
|
||||
return prefix.size() >= cap_len_;
|
||||
}
|
||||
};
|
||||
|
||||
class NoopTransform : public SliceTransform {
|
||||
@ -60,6 +92,10 @@ class NoopTransform : public SliceTransform {
|
||||
virtual bool InRange(const Slice& dst) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SameResultWhenAppended(const Slice& prefix) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@ -68,6 +104,10 @@ const SliceTransform* NewFixedPrefixTransform(size_t prefix_len) {
|
||||
return new FixedPrefixTransform(prefix_len);
|
||||
}
|
||||
|
||||
const SliceTransform* NewCappedPrefixTransform(size_t cap_len) {
|
||||
return new CappedPrefixTransform(cap_len);
|
||||
}
|
||||
|
||||
const SliceTransform* NewNoopTransform() {
|
||||
return new NoopTransform;
|
||||
}
|
||||
|
150
util/slice_transform_test.cc
Normal file
150
util/slice_transform_test.cc
Normal file
@ -0,0 +1,150 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "rocksdb/slice_transform.h"
|
||||
|
||||
#include "rocksdb/db.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/filter_policy.h"
|
||||
#include "rocksdb/statistics.h"
|
||||
#include "rocksdb/table.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class SliceTransformTest {};
|
||||
|
||||
TEST(SliceTransformTest, CapPrefixTransform) {
|
||||
std::string s;
|
||||
s = "abcdefge";
|
||||
|
||||
unique_ptr<const SliceTransform> transform;
|
||||
|
||||
transform.reset(NewCappedPrefixTransform(6));
|
||||
ASSERT_EQ(transform->Transform(s).ToString(), "abcdef");
|
||||
ASSERT_TRUE(transform->SameResultWhenAppended("123456"));
|
||||
ASSERT_TRUE(transform->SameResultWhenAppended("1234567"));
|
||||
ASSERT_TRUE(!transform->SameResultWhenAppended("12345"));
|
||||
|
||||
transform.reset(NewCappedPrefixTransform(8));
|
||||
ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge");
|
||||
|
||||
transform.reset(NewCappedPrefixTransform(10));
|
||||
ASSERT_EQ(transform->Transform(s).ToString(), "abcdefge");
|
||||
|
||||
transform.reset(NewCappedPrefixTransform(0));
|
||||
ASSERT_EQ(transform->Transform(s).ToString(), "");
|
||||
|
||||
transform.reset(NewCappedPrefixTransform(0));
|
||||
ASSERT_EQ(transform->Transform("").ToString(), "");
|
||||
}
|
||||
|
||||
class SliceTransformDBTest {
|
||||
private:
|
||||
std::string dbname_;
|
||||
Env* env_;
|
||||
DB* db_;
|
||||
|
||||
public:
|
||||
SliceTransformDBTest() : env_(Env::Default()), db_(nullptr) {
|
||||
dbname_ = test::TmpDir() + "/slice_transform_db_test";
|
||||
ASSERT_OK(DestroyDB(dbname_, last_options_));
|
||||
}
|
||||
|
||||
~SliceTransformDBTest() {
|
||||
delete db_;
|
||||
ASSERT_OK(DestroyDB(dbname_, last_options_));
|
||||
}
|
||||
|
||||
DB* db() { return db_; }
|
||||
|
||||
// Return the current option configuration.
|
||||
Options* GetOptions() { return &last_options_; }
|
||||
|
||||
void DestroyAndReopen() {
|
||||
// Destroy using last options
|
||||
Destroy();
|
||||
ASSERT_OK(TryReopen());
|
||||
}
|
||||
|
||||
void Destroy() {
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
ASSERT_OK(DestroyDB(dbname_, last_options_));
|
||||
}
|
||||
|
||||
Status TryReopen() {
|
||||
delete db_;
|
||||
db_ = nullptr;
|
||||
last_options_.create_if_missing = true;
|
||||
|
||||
return DB::Open(last_options_, dbname_, &db_);
|
||||
}
|
||||
|
||||
Options last_options_;
|
||||
};
|
||||
|
||||
namespace {
|
||||
uint64_t TestGetTickerCount(const Options& options, Tickers ticker_type) {
|
||||
return options.statistics->getTickerCount(ticker_type);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(SliceTransformDBTest, CapPrefix) {
|
||||
last_options_.prefix_extractor.reset(NewCappedPrefixTransform(8));
|
||||
last_options_.statistics = rocksdb::CreateDBStatistics();
|
||||
BlockBasedTableOptions bbto;
|
||||
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||
bbto.whole_key_filtering = false;
|
||||
last_options_.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
||||
ASSERT_OK(TryReopen());
|
||||
|
||||
ReadOptions ro;
|
||||
FlushOptions fo;
|
||||
WriteOptions wo;
|
||||
|
||||
ASSERT_OK(db()->Put(wo, "barbarbar", "foo"));
|
||||
ASSERT_OK(db()->Put(wo, "barbarbar2", "foo2"));
|
||||
ASSERT_OK(db()->Put(wo, "foo", "bar"));
|
||||
ASSERT_OK(db()->Put(wo, "foo3", "bar3"));
|
||||
ASSERT_OK(db()->Flush(fo));
|
||||
|
||||
unique_ptr<Iterator> iter(db()->NewIterator(ro));
|
||||
|
||||
iter->Seek("foo");
|
||||
ASSERT_OK(iter->status());
|
||||
ASSERT_TRUE(iter->Valid());
|
||||
ASSERT_EQ(iter->value().ToString(), "bar");
|
||||
ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 0U);
|
||||
|
||||
iter->Seek("foo2");
|
||||
ASSERT_OK(iter->status());
|
||||
ASSERT_TRUE(!iter->Valid());
|
||||
ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U);
|
||||
|
||||
iter->Seek("barbarbar");
|
||||
ASSERT_OK(iter->status());
|
||||
ASSERT_TRUE(iter->Valid());
|
||||
ASSERT_EQ(iter->value().ToString(), "foo");
|
||||
ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 1U);
|
||||
|
||||
iter->Seek("barfoofoo");
|
||||
ASSERT_OK(iter->status());
|
||||
ASSERT_TRUE(!iter->Valid());
|
||||
ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 2U);
|
||||
|
||||
iter->Seek("foobarbar");
|
||||
ASSERT_OK(iter->status());
|
||||
ASSERT_TRUE(!iter->Valid());
|
||||
ASSERT_EQ(TestGetTickerCount(last_options_, BLOOM_FILTER_PREFIX_USEFUL), 3U);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); }
|
Loading…
Reference in New Issue
Block a user