Add per-level compression ratio property

Summary:
This is needed so we can measure compression ratio improvements
achieved by D52287.

The property compares raw data size against the total file size for a given
level. If the level is empty it should return 0.0.

Test Plan: new unit test

Reviewers: IslamAbdelRahman, yhchiang, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D56967
This commit is contained in:
Andrew Kryczka 2016-04-20 18:46:54 -07:00
parent ee221d2de0
commit 73a847ef89
9 changed files with 106 additions and 0 deletions

View File

@ -2,6 +2,7 @@
## Unreleased
### Public API Change
* Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F
* Expose estimate of per-level compression ratio via DB property: "rocksdb.num-files-at-levelN".
## 4.7.0 (4/8/2016)
### Public API Change

View File

@ -19,6 +19,7 @@
#include "rocksdb/perf_level.h"
#include "rocksdb/table.h"
#include "util/random.h"
#include "util/string_util.h"
namespace rocksdb {
@ -898,6 +899,48 @@ TEST_F(DBPropertiesTest, EstimatePendingCompBytes) {
"rocksdb.estimate-pending-compaction-bytes", &int_num));
ASSERT_EQ(int_num, 0U);
}
TEST_F(DBPropertiesTest, EstimateCompressionRatio) {
if (!Snappy_Supported()) {
return;
}
const int kNumL0Files = 3;
const int kNumEntriesPerFile = 1000;
Options options = CurrentOptions();
options.compression_per_level = {kNoCompression, kSnappyCompression};
options.disable_auto_compactions = true;
options.max_background_flushes = 0;
options.num_levels = 2;
Reopen(options);
// compression ratio is -1.0 when no open files at level
ASSERT_EQ(CompressionRatioAtLevel(0), -1.0);
const std::string kVal(100, 'a');
for (int i = 0; i < kNumL0Files; ++i) {
for (int j = 0; j < kNumEntriesPerFile; ++j) {
// Put common data ("key") at end to prevent delta encoding from
// compressing the key effectively
std::string key = ToString(i) + ToString(j) + "key";
ASSERT_OK(dbfull()->Put(WriteOptions(), key, kVal));
}
Flush();
}
// no compression at L0, so ratio is less than one
ASSERT_LT(CompressionRatioAtLevel(0), 1.0);
ASSERT_GT(CompressionRatioAtLevel(0), 0.0);
ASSERT_EQ(CompressionRatioAtLevel(1), -1.0);
dbfull()->TEST_CompactRange(0, nullptr, nullptr);
ASSERT_EQ(CompressionRatioAtLevel(0), -1.0);
// Data at L1 should be highly compressed thanks to Snappy and redundant data
// in values (ratio is 12.846 as of 4/19/2016).
ASSERT_GT(CompressionRatioAtLevel(1), 10.0);
}
#endif // ROCKSDB_LITE
class CountingUserTblPropCollector : public TablePropertiesCollector {

View File

@ -714,6 +714,22 @@ int DBTestBase::NumTableFilesAtLevel(int level, int cf) {
return atoi(property.c_str());
}
double DBTestBase::CompressionRatioAtLevel(int level, int cf) {
std::string property;
if (cf == 0) {
// default cfd
EXPECT_TRUE(db_->GetProperty(
"rocksdb.compression-ratio-at-level" + NumberToString(level),
&property));
} else {
EXPECT_TRUE(db_->GetProperty(
handles_[cf],
"rocksdb.compression-ratio-at-level" + NumberToString(level),
&property));
}
return std::stod(property);
}
int DBTestBase::TotalTableFiles(int cf, int levels) {
if (levels == -1) {
levels = CurrentOptions().num_levels;

View File

@ -700,6 +700,8 @@ class DBTestBase : public testing::Test {
int NumTableFilesAtLevel(int level, int cf = 0);
double CompressionRatioAtLevel(int level, int cf = 0);
int TotalTableFiles(int cf = 0, int levels = -1);
// Return spread of files per level

View File

@ -101,6 +101,8 @@ std::pair<Slice, Slice> GetPropertyNameAndArg(const Slice& property) {
static const std::string rocksdb_prefix = "rocksdb.";
static const std::string num_files_at_level_prefix = "num-files-at-level";
static const std::string compression_ratio_at_level_prefix =
"compression-ratio-at-level";
static const std::string allstats = "stats";
static const std::string sstables = "sstables";
static const std::string cfstats = "cfstats";
@ -148,6 +150,8 @@ static const std::string num_running_flushes = "num-running-flushes";
const std::string DB::Properties::kNumFilesAtLevelPrefix =
rocksdb_prefix + num_files_at_level_prefix;
const std::string DB::Properties::kCompressionRatioAtLevelPrefix =
rocksdb_prefix + compression_ratio_at_level_prefix;
const std::string DB::Properties::kStats = rocksdb_prefix + allstats;
const std::string DB::Properties::kSSTables = rocksdb_prefix + sstables;
const std::string DB::Properties::kCFStats = rocksdb_prefix + cfstats;
@ -211,6 +215,8 @@ const std::unordered_map<std::string,
DBPropertyInfo> InternalStats::ppt_name_to_info = {
{DB::Properties::kNumFilesAtLevelPrefix,
{false, &InternalStats::HandleNumFilesAtLevel, nullptr}},
{DB::Properties::kCompressionRatioAtLevelPrefix,
{false, &InternalStats::HandleCompressionRatioAtLevelPrefix, nullptr}},
{DB::Properties::kLevelStats,
{false, &InternalStats::HandleLevelStats, nullptr}},
{DB::Properties::kStats, {false, &InternalStats::HandleStats, nullptr}},
@ -324,6 +330,19 @@ bool InternalStats::HandleNumFilesAtLevel(std::string* value, Slice suffix) {
}
}
bool InternalStats::HandleCompressionRatioAtLevelPrefix(std::string* value,
Slice suffix) {
uint64_t level;
const auto* vstorage = cfd_->current()->storage_info();
bool ok = ConsumeDecimalNumber(&suffix, &level) && suffix.empty();
if (!ok || level >= static_cast<uint64_t>(number_levels_)) {
return false;
}
*value = ToString(
vstorage->GetEstimatedCompressionRatioAtLevel(static_cast<int>(level)));
return true;
}
bool InternalStats::HandleLevelStats(std::string* value, Slice suffix) {
char buf[1000];
const auto* vstorage = cfd_->current()->storage_info();

View File

@ -294,6 +294,7 @@ class InternalStats {
// Handler functions for getting property values. They use "value" as a value-
// result argument, and return true upon successfully setting "value".
bool HandleNumFilesAtLevel(std::string* value, Slice suffix);
bool HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix);
bool HandleLevelStats(std::string* value, Slice suffix);
bool HandleStats(std::string* value, Slice suffix);
bool HandleCFStats(std::string* value, Slice suffix);

View File

@ -779,6 +779,21 @@ uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const {
}
}
double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel(
int level) const {
assert(level < num_levels_);
uint64_t sum_file_size_bytes = 0;
uint64_t sum_data_size_bytes = 0;
for (auto* file_meta : files_[level]) {
sum_file_size_bytes += file_meta->fd.GetFileSize();
sum_data_size_bytes += file_meta->raw_key_size + file_meta->raw_value_size;
}
if (sum_file_size_bytes == 0) {
return -1.0;
}
return static_cast<double>(sum_data_size_bytes) / sum_file_size_bytes;
}
void Version::AddIterators(const ReadOptions& read_options,
const EnvOptions& soptions,
MergeIteratorBuilder* merge_iter_builder) {

View File

@ -301,6 +301,8 @@ class VersionStorageInfo {
uint64_t GetEstimatedActiveKeys() const;
double GetEstimatedCompressionRatioAtLevel(int level) const;
// re-initializes the index that is used to offset into
// files_by_compaction_pri_
// to find the next compaction candidate file.

View File

@ -338,6 +338,13 @@ class DB {
// level number (e.g., "0").
static const std::string kNumFilesAtLevelPrefix;
// "rocksdb.compression-ratio-at-level<N>" - returns string containing the
// compression ratio of data at level <N>, where <N> is an ASCII
// representation of a level number (e.g., "0"). Here, compression
// ratio is defined as uncompressed data size / compressed file size.
// Returns "-1.0" if no open files at level <N>.
static const std::string kCompressionRatioAtLevelPrefix;
// "rocksdb.stats" - returns a multi-line string containing the data
// described by kCFStats followed by the data described by kDBStats.
static const std::string kStats;