Added min/max/avg data block size output to sst_dump

Summary: Added min/max/avg data block size output to sst_dump. Output was added to the end of BlockBasedTable::DumpDataBlocks, so it appears after the data block details, at the very end of the dump file.

Test Plan:
```
./db_bench --benchmarks=fillrandom
./sst_dump --file=/tmp/rocksdbtest-xyz/dbbench/000007.sst --command=raw
tail -n 6 /tmp/rocksdbtest-xyz/dbbench/000007_dump.txt
```
```
Data Block Summary:
--------------------------------------
  # data blocks: 11336
  min data block size: 903
  max data block size: 2268
  avg data block size: 2245.363356
```

Reviewers: IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D61815
This commit is contained in:
Philipp Unterbrunner 2016-08-12 16:34:11 -07:00
parent e408e98c8c
commit deda159b55

View File

@ -8,6 +8,8 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based_table_reader.h" #include "table/block_based_table_reader.h"
#include <algorithm>
#include <limits>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -228,7 +230,7 @@ class HashIndexReader : public IndexReader {
const SliceTransform* hash_key_extractor, const Footer& footer, const SliceTransform* hash_key_extractor, const Footer& footer,
RandomAccessFileReader* file, const ImmutableCFOptions &ioptions, RandomAccessFileReader* file, const ImmutableCFOptions &ioptions,
const Comparator* comparator, const BlockHandle& index_handle, const Comparator* comparator, const BlockHandle& index_handle,
InternalIterator* meta_index_iter, IndexReader** index_reader, InternalIterator* meta_index_iter, IndexReader** index_reader,
bool hash_index_allow_collision, bool hash_index_allow_collision,
const PersistentCacheOptions& cache_options) { const PersistentCacheOptions& cache_options) {
std::unique_ptr<Block> index_block; std::unique_ptr<Block> index_block;
@ -1539,7 +1541,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
handle, cache_key_storage); handle, cache_key_storage);
Slice ckey; Slice ckey;
s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr,
rep_->ioptions, options, &block, rep_->ioptions, options, &block,
rep_->table_options.format_version, rep_->table_options.format_version,
rep_->compression_dict_block rep_->compression_dict_block
@ -1588,7 +1590,7 @@ Status BlockBasedTable::CreateIndexReader(
switch (index_type_on_file) { switch (index_type_on_file) {
case BlockBasedTableOptions::kBinarySearch: { case BlockBasedTableOptions::kBinarySearch: {
return BinarySearchIndexReader::Create( return BinarySearchIndexReader::Create(
file, footer, footer.index_handle(), rep_->ioptions, comparator, file, footer, footer.index_handle(), rep_->ioptions, comparator,
index_reader, rep_->persistent_cache_options); index_reader, rep_->persistent_cache_options);
} }
case BlockBasedTableOptions::kHashSearch: { case BlockBasedTableOptions::kHashSearch: {
@ -1891,6 +1893,10 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
return s; return s;
} }
uint64_t datablock_size_min = std::numeric_limits<uint64_t>::max();
uint64_t datablock_size_max = 0;
uint64_t datablock_size_sum = 0;
size_t block_id = 1; size_t block_id = 1;
for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid(); for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
block_id++, blockhandles_iter->Next()) { block_id++, blockhandles_iter->Next()) {
@ -1899,6 +1905,14 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
break; break;
} }
Slice bh_val = blockhandles_iter->value();
BlockHandle bh;
bh.DecodeFrom(&bh_val);
uint64_t datablock_size = bh.size();
datablock_size_min = std::min(datablock_size_min, datablock_size);
datablock_size_max = std::max(datablock_size_max, datablock_size);
datablock_size_sum += datablock_size;
out_file->Append("Data Block # "); out_file->Append("Data Block # ");
out_file->Append(rocksdb::ToString(block_id)); out_file->Append(rocksdb::ToString(block_id));
out_file->Append(" @ "); out_file->Append(" @ ");
@ -1956,6 +1970,24 @@ Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
} }
out_file->Append("\n"); out_file->Append("\n");
} }
uint64_t num_datablocks = block_id - 1;
if (num_datablocks) {
double datablock_size_avg =
static_cast<double>(datablock_size_sum) / num_datablocks;
out_file->Append("Data Block Summary:\n");
out_file->Append("--------------------------------------");
out_file->Append("\n # data blocks: ");
out_file->Append(rocksdb::ToString(num_datablocks));
out_file->Append("\n min data block size: ");
out_file->Append(rocksdb::ToString(datablock_size_min));
out_file->Append("\n max data block size: ");
out_file->Append(rocksdb::ToString(datablock_size_max));
out_file->Append("\n avg data block size: ");
out_file->Append(rocksdb::ToString(datablock_size_avg));
out_file->Append("\n");
}
return Status::OK(); return Status::OK();
} }