Add the index/filter block cache
Summary: This diff leverage the existing block cache and extend it to cache index/filter block. Test Plan: Added new tests in db_test and table_test The correctness is checked by: 1. make check 2. make valgrind_check Performance is test by: 1. 10 times of build_tools/regression_build_test.sh on two versions of rocksdb before/after the code change. Test results suggests no significant difference between them. For the two key operatons `overwrite` and `readrandom`, the average iops are both 20k and ~260k, with very small variance). 2. db_stress. Reviewers: dhruba Reviewed By: dhruba CC: leveldb, haobo, xjin Differential Revision: https://reviews.facebook.net/D13167
This commit is contained in:
parent
aed9f1fa5e
commit
88ba331c1a
108
db/db_test.cc
108
db/db_test.cc
@ -698,6 +698,63 @@ TEST(DBTest, ReadWrite) {
|
|||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure that when options.block_cache is set, after a new table is
|
||||||
|
// created its index/filter blocks are added to block cache.
|
||||||
|
TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
std::unique_ptr<const FilterPolicy> filter_policy(NewBloomFilterPolicy(20));
|
||||||
|
options.filter_policy = filter_policy.get();
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
DestroyAndReopen(&options);
|
||||||
|
|
||||||
|
ASSERT_OK(db_->Put(WriteOptions(), "key", "val"));
|
||||||
|
// Create a new talbe.
|
||||||
|
dbfull()->Flush(FlushOptions());
|
||||||
|
|
||||||
|
// index/filter blocks added to block cache right after table creation.
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_MISS));
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
||||||
|
ASSERT_EQ(2, /* only index/filter were added */
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
|
ASSERT_EQ(0,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_DATA_MISS));
|
||||||
|
|
||||||
|
// Make sure filter block is in cache.
|
||||||
|
std::string value;
|
||||||
|
ReadOptions ropt;
|
||||||
|
db_->KeyMayExist(ReadOptions(), "key", &value);
|
||||||
|
|
||||||
|
// Miss count should remain the same.
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
||||||
|
|
||||||
|
db_->KeyMayExist(ReadOptions(), "key", &value);
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
||||||
|
ASSERT_EQ(2,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
||||||
|
|
||||||
|
// Make sure index block is in cache.
|
||||||
|
auto index_block_hit =
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT);
|
||||||
|
value = Get("key");
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
||||||
|
ASSERT_EQ(index_block_hit + 1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
||||||
|
|
||||||
|
value = Get("key");
|
||||||
|
ASSERT_EQ(1,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
||||||
|
ASSERT_EQ(index_block_hit + 2,
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
||||||
|
}
|
||||||
|
|
||||||
static std::string Key(int i) {
|
static std::string Key(int i) {
|
||||||
char buf[100];
|
char buf[100];
|
||||||
snprintf(buf, sizeof(buf), "key%06d", i);
|
snprintf(buf, sizeof(buf), "key%06d", i);
|
||||||
@ -768,6 +825,7 @@ TEST(DBTest, PutDeleteGet) {
|
|||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(DBTest, GetFromImmutableLayer) {
|
TEST(DBTest, GetFromImmutableLayer) {
|
||||||
do {
|
do {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
@ -917,43 +975,46 @@ TEST(DBTest, KeyMayExist) {
|
|||||||
value.clear();
|
value.clear();
|
||||||
|
|
||||||
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
long cache_miss =
|
long cache_added =
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
|
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
|
||||||
ASSERT_TRUE(!value_found);
|
ASSERT_TRUE(!value_found);
|
||||||
// assert that no new files were opened and no new blocks were
|
// assert that no new files were opened and no new blocks were
|
||||||
// read into block cache.
|
// read into block cache.
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
|
|
||||||
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
cache_miss = options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
cache_added =
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
|
|
||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
dbfull()->CompactRange(nullptr, nullptr);
|
dbfull()->CompactRange(nullptr, nullptr);
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
cache_miss = options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
cache_added =
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
|
|
||||||
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
cache_miss = options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
cache_added =
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
|
|
||||||
delete options.filter_policy;
|
delete options.filter_policy;
|
||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
@ -987,8 +1048,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
// verify that a non-blocking iterator does not find any
|
// verify that a non-blocking iterator does not find any
|
||||||
// kvs. Neither does it do any IOs to storage.
|
// kvs. Neither does it do any IOs to storage.
|
||||||
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
long cache_miss =
|
long cache_added =
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
iter = db_->NewIterator(non_blocking_opts);
|
iter = db_->NewIterator(non_blocking_opts);
|
||||||
count = 0;
|
count = 0;
|
||||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||||
@ -997,8 +1058,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
ASSERT_EQ(count, 0);
|
ASSERT_EQ(count, 0);
|
||||||
ASSERT_TRUE(iter->status().IsIncomplete());
|
ASSERT_TRUE(iter->status().IsIncomplete());
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
// read in the specified block via a regular get
|
// read in the specified block via a regular get
|
||||||
@ -1006,7 +1067,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
|
|
||||||
// verify that we can find it via a non-blocking scan
|
// verify that we can find it via a non-blocking scan
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
||||||
cache_miss = options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
cache_added =
|
||||||
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
||||||
iter = db_->NewIterator(non_blocking_opts);
|
iter = db_->NewIterator(non_blocking_opts);
|
||||||
count = 0;
|
count = 0;
|
||||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||||
@ -1015,8 +1077,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
}
|
}
|
||||||
ASSERT_EQ(count, 1);
|
ASSERT_EQ(count, 1);
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_miss,
|
ASSERT_EQ(cache_added,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS));
|
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
@ -3534,7 +3596,7 @@ TEST(DBTest, BloomFilter) {
|
|||||||
env_->count_random_reads_ = true;
|
env_->count_random_reads_ = true;
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
options.env = env_;
|
options.env = env_;
|
||||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
options.no_block_cache = true;
|
||||||
options.filter_policy = NewBloomFilterPolicy(10);
|
options.filter_policy = NewBloomFilterPolicy(10);
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
|
|
||||||
@ -4128,7 +4190,7 @@ TEST(DBTest, ReadCompaction) {
|
|||||||
options.write_buffer_size = 64 * 1024;
|
options.write_buffer_size = 64 * 1024;
|
||||||
options.filter_policy = nullptr;
|
options.filter_policy = nullptr;
|
||||||
options.block_size = 4096;
|
options.block_size = 4096;
|
||||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
options.no_block_cache = true;
|
||||||
|
|
||||||
Reopen(&options);
|
Reopen(&options);
|
||||||
|
|
||||||
@ -4708,7 +4770,7 @@ TEST(DBTest, PrefixScan) {
|
|||||||
env_->count_random_reads_ = true;
|
env_->count_random_reads_ = true;
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
options.env = env_;
|
options.env = env_;
|
||||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
options.no_block_cache = true;
|
||||||
options.filter_policy = NewBloomFilterPolicy(10);
|
options.filter_policy = NewBloomFilterPolicy(10);
|
||||||
options.prefix_extractor = prefix_extractor;
|
options.prefix_extractor = prefix_extractor;
|
||||||
options.whole_key_filtering = false;
|
options.whole_key_filtering = false;
|
||||||
|
@ -653,7 +653,8 @@ struct ReadOptions {
|
|||||||
// Default: false
|
// Default: false
|
||||||
bool verify_checksums;
|
bool verify_checksums;
|
||||||
|
|
||||||
// Should the data read for this iteration be cached in memory?
|
// Should the "data block"/"index block"/"filter block" read for this
|
||||||
|
// iteration be cached in memory?
|
||||||
// Callers may wish to set this field to false for bulk scans.
|
// Callers may wish to set this field to false for bulk scans.
|
||||||
// Default: true
|
// Default: true
|
||||||
bool fill_cache;
|
bool fill_cache;
|
||||||
|
@ -23,9 +23,32 @@ namespace rocksdb {
|
|||||||
* And incrementing TICKER_ENUM_MAX.
|
* And incrementing TICKER_ENUM_MAX.
|
||||||
*/
|
*/
|
||||||
enum Tickers {
|
enum Tickers {
|
||||||
|
// total block cache misses
|
||||||
|
// REQUIRES: BLOCK_CACHE_MISS == BLOCK_CACHE_INDEX_MISS +
|
||||||
|
// BLOCK_CACHE_FILTER_MISS +
|
||||||
|
// BLOCK_CACHE_DATA_MISS;
|
||||||
BLOCK_CACHE_MISS,
|
BLOCK_CACHE_MISS,
|
||||||
|
// total block cache hit
|
||||||
|
// REQUIRES: BLOCK_CACHE_HIT == BLOCK_CACHE_INDEX_HIT +
|
||||||
|
// BLOCK_CACHE_FILTER_HIT +
|
||||||
|
// BLOCK_CACHE_DATA_HIT;
|
||||||
BLOCK_CACHE_HIT,
|
BLOCK_CACHE_HIT,
|
||||||
BLOOM_FILTER_USEFUL, // no. of times bloom filter has avoided file reads.
|
// # of blocks added to block cache.
|
||||||
|
BLOCK_CACHE_ADD,
|
||||||
|
// # of times cache miss when accessing index block from block cache.
|
||||||
|
BLOCK_CACHE_INDEX_MISS,
|
||||||
|
// # of times cache hit when accessing index block from block cache.
|
||||||
|
BLOCK_CACHE_INDEX_HIT,
|
||||||
|
// # of times cache miss when accessing filter block from block cache.
|
||||||
|
BLOCK_CACHE_FILTER_MISS,
|
||||||
|
// # of times cache hit when accessing filter block from block cache.
|
||||||
|
BLOCK_CACHE_FILTER_HIT,
|
||||||
|
// # of times cache miss when accessing data block from block cache.
|
||||||
|
BLOCK_CACHE_DATA_MISS,
|
||||||
|
// # of times cache hit when accessing data block from block cache.
|
||||||
|
BLOCK_CACHE_DATA_HIT,
|
||||||
|
// # of times bloom filter has avoided file reads.
|
||||||
|
BLOOM_FILTER_USEFUL,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
|
* COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
|
||||||
@ -93,6 +116,13 @@ enum Tickers {
|
|||||||
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||||
{ BLOCK_CACHE_MISS, "rocksdb.block.cache.miss" },
|
{ BLOCK_CACHE_MISS, "rocksdb.block.cache.miss" },
|
||||||
{ BLOCK_CACHE_HIT, "rocksdb.block.cache.hit" },
|
{ BLOCK_CACHE_HIT, "rocksdb.block.cache.hit" },
|
||||||
|
{ BLOCK_CACHE_ADD, "rocksdb.block.cache.add" },
|
||||||
|
{ BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss" },
|
||||||
|
{ BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit" },
|
||||||
|
{ BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss" },
|
||||||
|
{ BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit" },
|
||||||
|
{ BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss" },
|
||||||
|
{ BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit" },
|
||||||
{ BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
|
{ BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
|
||||||
{ COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
|
{ COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
|
||||||
{ COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
|
{ COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@
|
|||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/table_stats.h"
|
#include "rocksdb/table_stats.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
@ -27,6 +28,7 @@ class RandomAccessFile;
|
|||||||
struct ReadOptions;
|
struct ReadOptions;
|
||||||
class TableCache;
|
class TableCache;
|
||||||
class TableReader;
|
class TableReader;
|
||||||
|
class FilterBlockReader;
|
||||||
|
|
||||||
using std::unique_ptr;
|
using std::unique_ptr;
|
||||||
|
|
||||||
@ -91,6 +93,9 @@ class BlockBasedTable : public TableReader {
|
|||||||
~BlockBasedTable();
|
~BlockBasedTable();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
template <class TValue>
|
||||||
|
struct CachableEntry;
|
||||||
|
|
||||||
struct Rep;
|
struct Rep;
|
||||||
Rep* rep_;
|
Rep* rep_;
|
||||||
bool compaction_optimized_;
|
bool compaction_optimized_;
|
||||||
@ -98,9 +103,37 @@ class BlockBasedTable : public TableReader {
|
|||||||
static Iterator* BlockReader(void*, const ReadOptions&,
|
static Iterator* BlockReader(void*, const ReadOptions&,
|
||||||
const EnvOptions& soptions, const Slice&,
|
const EnvOptions& soptions, const Slice&,
|
||||||
bool for_compaction);
|
bool for_compaction);
|
||||||
|
|
||||||
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&,
|
static Iterator* BlockReader(void*, const ReadOptions&, const Slice&,
|
||||||
bool* didIO, bool for_compaction = false);
|
bool* didIO, bool for_compaction = false);
|
||||||
|
|
||||||
|
// if `no_io == true`, we will not try to read filter from sst file
|
||||||
|
// if it is not cached yet.
|
||||||
|
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
|
||||||
|
|
||||||
|
Iterator* IndexBlockReader(const ReadOptions& options) const;
|
||||||
|
|
||||||
|
// Read the block, either from sst file or from cache. This method will try
|
||||||
|
// to read from cache only when block_cache is set or ReadOption doesn't
|
||||||
|
// explicitly prohibit storage IO.
|
||||||
|
//
|
||||||
|
// If the block is read from cache, the statistics for cache miss/hit of the
|
||||||
|
// the given type of block will be updated. User can specify
|
||||||
|
// `block_cache_miss_ticker` and `block_cache_hit_ticker` for the statistics
|
||||||
|
// update.
|
||||||
|
//
|
||||||
|
// On success, the `result` parameter will be populated, which contains a
|
||||||
|
// pointer to the block and its cache handle, which will be nullptr if it's
|
||||||
|
// not read from the cache.
|
||||||
|
static Status GetBlock(const BlockBasedTable* table,
|
||||||
|
const BlockHandle& handle,
|
||||||
|
const ReadOptions& options,
|
||||||
|
bool for_compaction,
|
||||||
|
Tickers block_cache_miss_ticker,
|
||||||
|
Tickers block_cache_hit_ticker,
|
||||||
|
bool* didIO,
|
||||||
|
CachableEntry<Block>* result);
|
||||||
|
|
||||||
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
|
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
|
||||||
// after a call to Seek(key), until handle_result returns false.
|
// after a call to Seek(key), until handle_result returns false.
|
||||||
// May not make such a call if filter policy says that key is not present.
|
// May not make such a call if filter policy says that key is not present.
|
||||||
@ -111,6 +144,22 @@ class BlockBasedTable : public TableReader {
|
|||||||
void ReadFilter(const Slice& filter_handle_value);
|
void ReadFilter(const Slice& filter_handle_value);
|
||||||
static Status ReadStats(const Slice& handle_value, Rep* rep);
|
static Status ReadStats(const Slice& handle_value, Rep* rep);
|
||||||
|
|
||||||
|
// Read the meta block from sst.
|
||||||
|
static Status ReadMetaBlock(
|
||||||
|
Rep* rep,
|
||||||
|
std::unique_ptr<Block>* meta_block,
|
||||||
|
std::unique_ptr<Iterator>* iter);
|
||||||
|
|
||||||
|
// Create the filter from the filter block.
|
||||||
|
static FilterBlockReader* ReadFilter(
|
||||||
|
const Slice& filter_handle_value,
|
||||||
|
Rep* rep,
|
||||||
|
size_t* filter_size = nullptr);
|
||||||
|
|
||||||
|
// Read the table stats from stats block.
|
||||||
|
static Status ReadStats(
|
||||||
|
const Slice& handle_value, Rep* rep, TableStats* stats);
|
||||||
|
|
||||||
static void SetupCacheKeyPrefix(Rep* rep);
|
static void SetupCacheKeyPrefix(Rep* rep);
|
||||||
|
|
||||||
explicit BlockBasedTable(Rep* rep) :
|
explicit BlockBasedTable(Rep* rep) :
|
||||||
|
@ -127,7 +127,8 @@ void FilterBlockBuilder::GenerateFilter() {
|
|||||||
start_.clear();
|
start_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterBlockReader::FilterBlockReader(const Options& opt, const Slice& contents)
|
FilterBlockReader::FilterBlockReader(
|
||||||
|
const Options& opt, const Slice& contents, bool delete_contents_after_use)
|
||||||
: policy_(opt.filter_policy),
|
: policy_(opt.filter_policy),
|
||||||
prefix_extractor_(opt.prefix_extractor),
|
prefix_extractor_(opt.prefix_extractor),
|
||||||
whole_key_filtering_(opt.whole_key_filtering),
|
whole_key_filtering_(opt.whole_key_filtering),
|
||||||
@ -143,6 +144,9 @@ FilterBlockReader::FilterBlockReader(const Options& opt, const Slice& contents)
|
|||||||
data_ = contents.data();
|
data_ = contents.data();
|
||||||
offset_ = data_ + last_word;
|
offset_ = data_ + last_word;
|
||||||
num_ = (n - 5 - last_word) / 4;
|
num_ = (n - 5 - last_word) / 4;
|
||||||
|
if (delete_contents_after_use) {
|
||||||
|
filter_data.reset(contents.data());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset,
|
bool FilterBlockReader::KeyMayMatch(uint64_t block_offset,
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
// into a single filter block.
|
// into a single filter block.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -62,7 +64,10 @@ class FilterBlockBuilder {
|
|||||||
class FilterBlockReader {
|
class FilterBlockReader {
|
||||||
public:
|
public:
|
||||||
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
// REQUIRES: "contents" and *policy must stay live while *this is live.
|
||||||
FilterBlockReader(const Options& opt, const Slice& contents);
|
FilterBlockReader(
|
||||||
|
const Options& opt,
|
||||||
|
const Slice& contents,
|
||||||
|
bool delete_contents_after_use = false);
|
||||||
bool KeyMayMatch(uint64_t block_offset, const Slice& key);
|
bool KeyMayMatch(uint64_t block_offset, const Slice& key);
|
||||||
bool PrefixMayMatch(uint64_t block_offset, const Slice& prefix);
|
bool PrefixMayMatch(uint64_t block_offset, const Slice& prefix);
|
||||||
|
|
||||||
@ -74,6 +79,8 @@ class FilterBlockReader {
|
|||||||
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
const char* offset_; // Pointer to beginning of offset array (at block-end)
|
||||||
size_t num_; // Number of entries in offset array
|
size_t num_; // Number of entries in offset array
|
||||||
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file)
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
|
|
||||||
|
|
||||||
bool MayMatch(uint64_t block_offset, const Slice& entry);
|
bool MayMatch(uint64_t block_offset, const Slice& entry);
|
||||||
};
|
};
|
||||||
|
@ -12,18 +12,19 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
|
#include "db/db_statistics.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
#include "rocksdb/table.h"
|
|
||||||
#include "rocksdb/memtablerep.h"
|
#include "rocksdb/memtablerep.h"
|
||||||
#include "table/block.h"
|
|
||||||
#include "table/block_builder.h"
|
|
||||||
#include "table/format.h"
|
|
||||||
#include "table/block_based_table_reader.h"
|
|
||||||
#include "table/block_based_table_builder.h"
|
#include "table/block_based_table_builder.h"
|
||||||
|
#include "table/block_based_table_reader.h"
|
||||||
|
#include "table/block_builder.h"
|
||||||
|
#include "table/block.h"
|
||||||
|
#include "table/format.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
@ -486,8 +487,7 @@ struct TestArgs {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static std::vector<TestArgs> Generate_Arg_List()
|
static std::vector<TestArgs> Generate_Arg_List() {
|
||||||
{
|
|
||||||
std::vector<TestArgs> ret;
|
std::vector<TestArgs> ret;
|
||||||
TestType test_type[4] = {TABLE_TEST, BLOCK_TEST, MEMTABLE_TEST, DB_TEST};
|
TestType test_type[4] = {TABLE_TEST, BLOCK_TEST, MEMTABLE_TEST, DB_TEST};
|
||||||
int test_type_len = 4;
|
int test_type_len = 4;
|
||||||
@ -928,6 +928,181 @@ TEST(TableTest, NumBlockStat) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class BlockCacheStats {
|
||||||
|
public:
|
||||||
|
explicit BlockCacheStats(std::shared_ptr<Statistics> statistics) {
|
||||||
|
block_cache_miss =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_MISS);
|
||||||
|
block_cache_hit =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_HIT);
|
||||||
|
index_block_cache_miss =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_MISS);
|
||||||
|
index_block_cache_hit =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_HIT);
|
||||||
|
data_block_cache_miss =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_DATA_MISS);
|
||||||
|
data_block_cache_hit =
|
||||||
|
statistics.get()->getTickerCount(BLOCK_CACHE_DATA_HIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the fetched stats matches the expected ones.
|
||||||
|
void AssertEqual(
|
||||||
|
long index_block_cache_miss,
|
||||||
|
long index_block_cache_hit,
|
||||||
|
long data_block_cache_miss,
|
||||||
|
long data_block_cache_hit) const {
|
||||||
|
ASSERT_EQ(index_block_cache_miss, this->index_block_cache_miss);
|
||||||
|
ASSERT_EQ(index_block_cache_hit, this->index_block_cache_hit);
|
||||||
|
ASSERT_EQ(data_block_cache_miss, this->data_block_cache_miss);
|
||||||
|
ASSERT_EQ(data_block_cache_hit, this->data_block_cache_hit);
|
||||||
|
ASSERT_EQ(
|
||||||
|
index_block_cache_miss + data_block_cache_miss,
|
||||||
|
this->block_cache_miss
|
||||||
|
);
|
||||||
|
ASSERT_EQ(
|
||||||
|
index_block_cache_hit + data_block_cache_hit,
|
||||||
|
this->block_cache_hit
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
long block_cache_miss = 0;
|
||||||
|
long block_cache_hit = 0;
|
||||||
|
long index_block_cache_miss = 0;
|
||||||
|
long index_block_cache_hit = 0;
|
||||||
|
long data_block_cache_miss = 0;
|
||||||
|
long data_block_cache_hit = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(TableTest, BlockCacheTest) {
|
||||||
|
// -- Table construction
|
||||||
|
Options options;
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.statistics = CreateDBStatistics();
|
||||||
|
options.block_cache = NewLRUCache(1024);
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
KVMap kvmap;
|
||||||
|
|
||||||
|
BlockBasedTableConstructor c(BytewiseComparator());
|
||||||
|
c.Add("key", "value");
|
||||||
|
c.Finish(options, &keys, &kvmap);
|
||||||
|
|
||||||
|
// -- PART 1: Open with regular block cache.
|
||||||
|
// Since block_cache is disabled, no cache activities will be involved.
|
||||||
|
unique_ptr<Iterator> iter;
|
||||||
|
|
||||||
|
// At first, no block will be accessed.
|
||||||
|
{
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
// index will be added to block cache.
|
||||||
|
stats.AssertEqual(
|
||||||
|
1, // index block miss
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only index block will be accessed
|
||||||
|
{
|
||||||
|
iter.reset(c.NewIterator());
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
// NOTE: to help better highlight the "detla" of each ticker, I use
|
||||||
|
// <last_value> + <added_value> to indicate the increment of changed
|
||||||
|
// value; other numbers remain the same.
|
||||||
|
stats.AssertEqual(
|
||||||
|
1,
|
||||||
|
0 + 1, // index block hit
|
||||||
|
0,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only data block will be accessed
|
||||||
|
{
|
||||||
|
iter->SeekToFirst();
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
stats.AssertEqual(
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
0 + 1, // data block miss
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data block will be in cache
|
||||||
|
{
|
||||||
|
iter.reset(c.NewIterator());
|
||||||
|
iter->SeekToFirst();
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
stats.AssertEqual(
|
||||||
|
1,
|
||||||
|
1 + 1, // index block hit
|
||||||
|
1,
|
||||||
|
0 + 1 // data block hit
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// release the iterator so that the block cache can reset correctly.
|
||||||
|
iter.reset();
|
||||||
|
|
||||||
|
// -- PART 2: Open without block cache
|
||||||
|
options.block_cache.reset();
|
||||||
|
options.statistics = CreateDBStatistics(); // reset the stats
|
||||||
|
c.Reopen(options);
|
||||||
|
|
||||||
|
{
|
||||||
|
iter.reset(c.NewIterator());
|
||||||
|
iter->SeekToFirst();
|
||||||
|
ASSERT_EQ("key", iter->key().ToString());
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
// Nothing is affected at all
|
||||||
|
stats.AssertEqual(0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- PART 3: Open with very small block cache
|
||||||
|
// In this test, no block will ever get hit since the block cache is
|
||||||
|
// too small to fit even one entry.
|
||||||
|
options.block_cache = NewLRUCache(1);
|
||||||
|
c.Reopen(options);
|
||||||
|
{
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
stats.AssertEqual(
|
||||||
|
1, // index block miss
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
// Both index and data block get accessed.
|
||||||
|
// It first cache index block then data block. But since the cache size
|
||||||
|
// is only 1, index block will be purged after data block is inserted.
|
||||||
|
iter.reset(c.NewIterator());
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
stats.AssertEqual(
|
||||||
|
1 + 1, // index block miss
|
||||||
|
0,
|
||||||
|
0, // data block miss
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// SeekToFirst() accesses data block. With similar reason, we expect data
|
||||||
|
// block's cache miss.
|
||||||
|
iter->SeekToFirst();
|
||||||
|
BlockCacheStats stats(options.statistics);
|
||||||
|
stats.AssertEqual(
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
0 + 1, // data block miss
|
||||||
|
0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(TableTest, ApproximateOffsetOfPlain) {
|
TEST(TableTest, ApproximateOffsetOfPlain) {
|
||||||
BlockBasedTableConstructor c(BytewiseComparator());
|
BlockBasedTableConstructor c(BytewiseComparator());
|
||||||
c.Add("k01", "hello");
|
c.Add("k01", "hello");
|
||||||
|
@ -1285,11 +1285,11 @@ class StressTest {
|
|||||||
ttl_state = NumberToString(FLAGS_ttl);
|
ttl_state = NumberToString(FLAGS_ttl);
|
||||||
}
|
}
|
||||||
fprintf(stdout, "Time to live(sec) : %s\n", ttl_state.c_str());
|
fprintf(stdout, "Time to live(sec) : %s\n", ttl_state.c_str());
|
||||||
fprintf(stdout, "Read percentage : %d\n", FLAGS_readpercent);
|
fprintf(stdout, "Read percentage : %d%%\n", FLAGS_readpercent);
|
||||||
fprintf(stdout, "Prefix percentage : %d\n", FLAGS_prefixpercent);
|
fprintf(stdout, "Prefix percentage : %d%%\n", FLAGS_prefixpercent);
|
||||||
fprintf(stdout, "Write percentage : %d\n", FLAGS_writepercent);
|
fprintf(stdout, "Write percentage : %d%%\n", FLAGS_writepercent);
|
||||||
fprintf(stdout, "Delete percentage : %d\n", FLAGS_delpercent);
|
fprintf(stdout, "Delete percentage : %d%%\n", FLAGS_delpercent);
|
||||||
fprintf(stdout, "Iterate percentage : %d\n", FLAGS_iterpercent);
|
fprintf(stdout, "Iterate percentage : %d%%\n", FLAGS_iterpercent);
|
||||||
fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size);
|
fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size);
|
||||||
fprintf(stdout, "Iterations : %lu\n", FLAGS_num_iterations);
|
fprintf(stdout, "Iterations : %lu\n", FLAGS_num_iterations);
|
||||||
fprintf(stdout, "Max key : %ld\n", FLAGS_max_key);
|
fprintf(stdout, "Max key : %ld\n", FLAGS_max_key);
|
||||||
|
Loading…
Reference in New Issue
Block a user