From fdc0675f5c59c6e43ccb78f6ed06cb5e72cf52d9 Mon Sep 17 00:00:00 2001 From: John Alexander Date: Tue, 19 Jul 2016 09:44:03 -0700 Subject: [PATCH] New Statistics to track Compression/Decompression (#1197) * Added new statistics and refactored to allow ioptions to be passed around as required to access environment and statistics pointers (and, as a convenient side effect, info_log pointer). * Prevent incrementing compression counter when compression is turned off in options. * Prevent incrementing compression counter when compression is turned off in options. * Added two more supported compression types to test code in db_test.cc * Prevent incrementing compression counter when compression is turned off in options. * Added new StatsLevel that excludes compression timing. * Fixed casting error in coding.h * Fixed CompressionStatsTest for new StatsLevel. * Removed unused variable that was breaking the Linux build --- .gitignore | 2 + db/db_test.cc | 75 ++++++++++++++++++ db/plain_table_db_test.cc | 7 +- db/table_properties_collector_test.cc | 4 +- db/version_set.cc | 3 +- include/rocksdb/statistics.h | 22 ++++++ table/block_based_table_builder.cc | 17 +++- table/block_based_table_reader.cc | 110 +++++++++++++------------- table/block_based_table_reader.h | 6 +- table/cuckoo_table_builder_test.cc | 6 +- table/cuckoo_table_reader.cc | 2 +- table/format.cc | 41 +++++++--- table/format.h | 14 ++-- table/meta_blocks.cc | 30 ++++--- table/meta_blocks.h | 13 +-- table/plain_table_reader.cc | 6 +- table/table_test.cc | 2 +- tools/sst_dump_tool.cc | 3 +- util/coding.h | 6 +- 19 files changed, 255 insertions(+), 114 deletions(-) diff --git a/.gitignore b/.gitignore index 62a8bf1bc..ec429a96f 100644 --- a/.gitignore +++ b/.gitignore @@ -64,3 +64,5 @@ java/javadoc scan_build_report/ t LOG + +db_logs/ diff --git a/db/db_test.cc b/db/db_test.cc index 1d2fdd851..719d84fcb 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1189,6 +1189,12 @@ bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, } else if (LZ4_Supported()) { type = kLZ4Compression; fprintf(stderr, "using lz4\n"); + } else if (XPRESS_Supported()) { + type = kXpressCompression; + fprintf(stderr, "using xpress\n"); + } else if (ZSTD_Supported()) { + type = kZSTDNotFinalCompression; + fprintf(stderr, "using ZSTD\n"); } else { fprintf(stderr, "skipping test, compression disabled\n"); return false; @@ -4685,6 +4691,75 @@ TEST_F(DBTest, EncodeDecompressedBlockSizeTest) { } } +TEST_F(DBTest, CompressionStatsTest) { + CompressionType type; + + if (Snappy_Supported()) { + type = kSnappyCompression; + fprintf(stderr, "using snappy\n"); + } else if (Zlib_Supported()) { + type = kZlibCompression; + fprintf(stderr, "using zlib\n"); + } else if (BZip2_Supported()) { + type = kBZip2Compression; + fprintf(stderr, "using bzip2\n"); + } else if (LZ4_Supported()) { + type = kLZ4Compression; + fprintf(stderr, "using lz4\n"); + } else if (XPRESS_Supported()) { + type = kXpressCompression; + fprintf(stderr, "using xpress\n"); + } else if (ZSTD_Supported()) { + type = kZSTDNotFinalCompression; + fprintf(stderr, "using ZSTD\n"); + } else { + fprintf(stderr, "skipping test, compression disabled\n"); + return; + } + + Options options = CurrentOptions(); + options.compression = type; + options.statistics = rocksdb::CreateDBStatistics(); + options.statistics->stats_level_ = StatsLevel::kAll; + DestroyAndReopen(options); + + int kNumKeysWritten = 100000; + + // Check that compressions occur and are counted when compression is turned on + Random rnd(301); + for (int i = 0; i < kNumKeysWritten; ++i) { + // compressible string + ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a'))); + } + ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED), 0); + + for (int i = 0; i < kNumKeysWritten; ++i) { + auto r = Get(Key(i)); + } + ASSERT_GT(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED), 0); + + options.compression = kNoCompression; + DestroyAndReopen(options); + uint64_t currentCompressions = + options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED); + uint64_t currentDecompressions = + options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED); + + // Check that compressions do not occur when turned off + for (int i = 0; i < kNumKeysWritten; ++i) { + // compressible string + ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a'))); + } + ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED) + - currentCompressions, 0); + + for (int i = 0; i < kNumKeysWritten; ++i) { + auto r = Get(Key(i)); + } + ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_DECOMPRESSED) + - currentDecompressions, 0); +} + TEST_F(DBTest, MutexWaitStatsDisabledByDefault) { Options options = CurrentOptions(); options.create_if_missing = true; diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index a268fc627..9d3803f9e 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -331,20 +331,19 @@ class TestPlainTableFactory : public PlainTableFactory { TableProperties* props = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions.env, - table_reader_options.ioptions.info_log, &props); + table_reader_options.ioptions, &props); EXPECT_TRUE(s.ok()); if (store_index_in_file_) { BlockHandle bloom_block_handle; s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions.env, + table_reader_options.ioptions, BloomBlockBuilder::kBloomBlock, &bloom_block_handle); EXPECT_TRUE(s.ok()); BlockHandle index_block_handle; s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, - table_reader_options.ioptions.env, + table_reader_options.ioptions, PlainTableIndexBuilder::kPlainTableIndexBlock, &index_block_handle); EXPECT_TRUE(s.ok()); diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 6192ca08d..203317267 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -276,7 +276,7 @@ void TestCustomizedTablePropertiesCollector( new test::StringSource(fwf->contents()))); TableProperties* props; Status s = ReadTableProperties(fake_file_reader.get(), fwf->contents().size(), - magic_number, Env::Default(), nullptr, &props); + magic_number, ioptions, &props); std::unique_ptr props_guard(props); ASSERT_OK(s); @@ -417,7 +417,7 @@ void TestInternalKeyPropertiesCollector( TableProperties* props; Status s = ReadTableProperties(reader.get(), fwf->contents().size(), magic_number, - Env::Default(), nullptr, &props); + ioptions, &props); ASSERT_OK(s); std::unique_ptr props_guard(props); diff --git a/db/version_set.cc b/db/version_set.cc index 6ded4fdb5..8a858ddaf 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -597,8 +597,7 @@ Status Version::GetTableProperties(std::shared_ptr* tp, new RandomAccessFileReader(std::move(file))); s = ReadTableProperties( file_reader.get(), file_meta->fd.GetFileSize(), - Footer::kInvalidTableMagicNumber /* table's magic number */, vset_->env_, - ioptions->info_log, &raw_table_properties); + Footer::kInvalidTableMagicNumber /* table's magic number */, *ioptions, &raw_table_properties); if (!s.ok()) { return s; } diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 725e1fd82..16fba0906 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -179,6 +179,11 @@ enum Tickers : uint32_t { NUMBER_SUPERVERSION_ACQUIRES, NUMBER_SUPERVERSION_RELEASES, NUMBER_SUPERVERSION_CLEANUPS, + + // # of compressions/decompressions executed + NUMBER_BLOCK_COMPRESSED, + NUMBER_BLOCK_DECOMPRESSED, + NUMBER_BLOCK_NOT_COMPRESSED, MERGE_OPERATION_TOTAL_TIME, FILTER_OPERATION_TOTAL_TIME, @@ -272,6 +277,8 @@ const std::vector> TickersNameMap = { {NUMBER_SUPERVERSION_ACQUIRES, "rocksdb.number.superversion_acquires"}, {NUMBER_SUPERVERSION_RELEASES, "rocksdb.number.superversion_releases"}, {NUMBER_SUPERVERSION_CLEANUPS, "rocksdb.number.superversion_cleanups"}, + {NUMBER_BLOCK_COMPRESSED, "rocksdb.number.block.compressed"}, + {NUMBER_BLOCK_DECOMPRESSED, "rocksdb.number.block.decompressed"}, {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"}, {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"}, @@ -316,6 +323,14 @@ enum Histograms : uint32_t { BYTES_PER_READ, BYTES_PER_WRITE, BYTES_PER_MULTIGET, + + // number of bytes compressed/decompressed + // number of bytes is when uncompressed; i.e. before/after respectively + BYTES_COMPRESSED, + BYTES_DECOMPRESSED, + COMPRESSION_TIMES_NANOS, + DECOMPRESSION_TIMES_NANOS, + HISTOGRAM_ENUM_MAX, // TODO(ldemailly): enforce HistogramsNameMap match }; @@ -346,6 +361,10 @@ const std::vector> HistogramsNameMap = { {BYTES_PER_READ, "rocksdb.bytes.per.read"}, {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, {BYTES_PER_MULTIGET, "rocksdb.bytes.per.multiget"}, + {BYTES_COMPRESSED, "rocksdb.bytes.compressed"}, + {BYTES_DECOMPRESSED, "rocksdb.bytes.decompressed"}, + {COMPRESSION_TIMES_NANOS, "rocksdb.compression.times.nanos"}, + {DECOMPRESSION_TIMES_NANOS, "rocksdb.decompression.times.nanos"}, }; struct HistogramData { @@ -360,6 +379,9 @@ enum StatsLevel { // Collect all stats except the counters requiring to get time inside the // mutex lock. kExceptTimeForMutex, + // Collect all stats expect time inside mutex lock AND time spent on + // compression + kExceptDetailedTimers, // Collect all stats, including measuring duration of mutex operations. // If getting time is expensive on the platform to run, it can // reduce scalability to more threads, especially for writes. diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 627141149..21720bdb7 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -651,11 +651,16 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, auto type = r->compression_type; Slice block_contents; bool abort_compression = false; + + StopWatchNano timer(r->ioptions.env, + ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)); + if (raw_block_contents.size() < kCompressionSizeLimit) { Slice compression_dict; if (is_data_block && r->compression_dict && r->compression_dict->size()) { compression_dict = *r->compression_dict; } + block_contents = CompressBlock(raw_block_contents, r->compression_opts, &type, r->table_options.format_version, compression_dict, &r->compressed_output); @@ -668,7 +673,8 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, BlockContents contents; Status stat = UncompressBlockContentsForCompressionType( block_contents.data(), block_contents.size(), &contents, - r->table_options.format_version, compression_dict, type); + r->table_options.format_version, compression_dict, type, + r->ioptions); if (stat.ok()) { bool compressed_ok = contents.data.compare(raw_block_contents) == 0; @@ -698,6 +704,15 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, type = kNoCompression; block_contents = raw_block_contents; } + else if (type != kNoCompression && + ShouldReportDetailedTime(r->ioptions.env, + r->ioptions.statistics)) { + MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS, + timer.ElapsedNanos()); + MeasureTime(r->ioptions.statistics, BYTES_COMPRESSED, + raw_block_contents.size()); + RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED); + } WriteRawBlock(block_contents, type, handle); r->compressed_output.clear(); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index efc619222..1f95755ad 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -60,14 +60,13 @@ namespace { // dictionary. Status ReadBlockFromFile(RandomAccessFileReader* file, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, - std::unique_ptr* result, Env* env, + std::unique_ptr* result, + const ImmutableCFOptions &ioptions, bool do_uncompress, const Slice& compression_dict, - const PersistentCacheOptions& cache_options, - Logger* info_log) { + const PersistentCacheOptions& cache_options) { BlockContents contents; - Status s = ReadBlockContents(file, footer, options, handle, &contents, env, - do_uncompress, compression_dict, cache_options, - info_log); + Status s = ReadBlockContents(file, footer, options, handle, &contents, ioptions, + do_uncompress, compression_dict, cache_options); if (s.ok()) { result->reset(new Block(std::move(contents))); } @@ -177,19 +176,18 @@ class BinarySearchIndexReader : public IndexReader { // On success, index_reader will be populated; otherwise it will remain // unmodified. static Status Create(RandomAccessFileReader* file, const Footer& footer, - const BlockHandle& index_handle, Env* env, + const BlockHandle& index_handle, + const ImmutableCFOptions &ioptions, const Comparator* comparator, IndexReader** index_reader, - const PersistentCacheOptions& cache_options, - Statistics* statistics) { + const PersistentCacheOptions& cache_options) { std::unique_ptr index_block; auto s = ReadBlockFromFile(file, footer, ReadOptions(), index_handle, - &index_block, env, true /* decompress */, - Slice() /*compression dict*/, cache_options, - /*info_log*/ nullptr); + &index_block, ioptions, true /* decompress */, + Slice() /*compression dict*/, cache_options); if (s.ok()) { *index_reader = new BinarySearchIndexReader( - comparator, std::move(index_block), statistics); + comparator, std::move(index_block), ioptions.statistics); } return s; @@ -226,15 +224,15 @@ class HashIndexReader : public IndexReader { public: static Status Create( const SliceTransform* hash_key_extractor, const Footer& footer, - RandomAccessFileReader* file, Env* env, const Comparator* comparator, - const BlockHandle& index_handle, InternalIterator* meta_index_iter, - IndexReader** index_reader, bool hash_index_allow_collision, - const PersistentCacheOptions& cache_options, Statistics* statistics) { + RandomAccessFileReader* file, const ImmutableCFOptions &ioptions, + const Comparator* comparator, const BlockHandle& index_handle, + InternalIterator* meta_index_iter, IndexReader** index_reader, + bool hash_index_allow_collision, + const PersistentCacheOptions& cache_options) { std::unique_ptr index_block; auto s = ReadBlockFromFile(file, footer, ReadOptions(), index_handle, - &index_block, env, true /* decompress */, - Slice() /*compression dict*/, cache_options, - /*info_log*/ nullptr); + &index_block, ioptions, true /* decompress */, + Slice() /*compression dict*/, cache_options); if (!s.ok()) { return s; @@ -245,7 +243,8 @@ class HashIndexReader : public IndexReader { // So, Create will succeed regardless, from this point on. auto new_index_reader = - new HashIndexReader(comparator, std::move(index_block), statistics); + new HashIndexReader(comparator, std::move(index_block), + ioptions.statistics); *index_reader = new_index_reader; // Get prefixes block @@ -269,14 +268,14 @@ class HashIndexReader : public IndexReader { // Read contents for the blocks BlockContents prefixes_contents; s = ReadBlockContents(file, footer, ReadOptions(), prefixes_handle, - &prefixes_contents, env, true /* decompress */, + &prefixes_contents, ioptions, true /* decompress */, Slice() /*compression dict*/, cache_options); if (!s.ok()) { return s; } BlockContents prefixes_meta_contents; s = ReadBlockContents(file, footer, ReadOptions(), prefixes_meta_handle, - &prefixes_meta_contents, env, true /* decompress */, + &prefixes_meta_contents, ioptions, true /* decompress */, Slice() /*compression dict*/, cache_options); if (!s.ok()) { // TODO: log error @@ -547,7 +546,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, PersistentCacheOptions(rep->table_options.persistent_cache, std::string(rep->persistent_cache_key_prefix, rep->persistent_cache_key_prefix_size), - rep->ioptions.statistics); + rep->ioptions.statistics); // Read meta index std::unique_ptr meta; @@ -585,8 +584,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, TableProperties* table_properties = nullptr; if (s.ok()) { s = ReadProperties(meta_iter->value(), rep->file.get(), rep->footer, - rep->ioptions.env, rep->ioptions.info_log, - &table_properties); + rep->ioptions, &table_properties); } if (!s.ok()) { @@ -613,7 +611,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, // true. unique_ptr compression_dict_block{new BlockContents()}; s = rocksdb::ReadMetaBlock(rep->file.get(), file_size, - kBlockBasedTableMagicNumber, rep->ioptions.env, + kBlockBasedTableMagicNumber, rep->ioptions, rocksdb::kCompressionDictBlock, compression_dict_block.get()); if (!s.ok()) { @@ -745,9 +743,9 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep, std::unique_ptr meta; Status s = ReadBlockFromFile( rep->file.get(), rep->footer, ReadOptions(), - rep->footer.metaindex_handle(), &meta, rep->ioptions.env, + rep->footer.metaindex_handle(), &meta, rep->ioptions, true /* decompress */, Slice() /*compression dict*/, - rep->persistent_cache_options, rep->ioptions.info_log); + rep->persistent_cache_options); if (!s.ok()) { Log(InfoLogLevel::ERROR_LEVEL, rep->ioptions.info_log, @@ -764,13 +762,14 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep, Status BlockBasedTable::GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, - Cache* block_cache, Cache* block_cache_compressed, Statistics* statistics, - const ReadOptions& read_options, + Cache* block_cache, Cache* block_cache_compressed, + const ImmutableCFOptions &ioptions, const ReadOptions& read_options, BlockBasedTable::CachableEntry* block, uint32_t format_version, const Slice& compression_dict) { Status s; Block* compressed_block = nullptr; Cache::Handle* block_cache_compressed_handle = nullptr; + Statistics* statistics = ioptions.statistics; // Lookup uncompressed cache first if (block_cache != nullptr) { @@ -811,7 +810,8 @@ Status BlockBasedTable::GetDataBlockFromCache( BlockContents contents; s = UncompressBlockContents(compressed_block->data(), compressed_block->size(), &contents, - format_version, compression_dict); + format_version, compression_dict, + ioptions); // Insert uncompressed block into block cache if (s.ok()) { @@ -840,7 +840,7 @@ Status BlockBasedTable::GetDataBlockFromCache( Status BlockBasedTable::PutDataBlockToCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, - const ReadOptions& read_options, Statistics* statistics, + const ReadOptions& read_options, const ImmutableCFOptions &ioptions, CachableEntry* block, Block* raw_block, uint32_t format_version, const Slice& compression_dict) { assert(raw_block->compression_type() == kNoCompression || @@ -849,9 +849,10 @@ Status BlockBasedTable::PutDataBlockToCache( Status s; // Retrieve the uncompressed contents into a new buffer BlockContents contents; + Statistics* statistics = ioptions.statistics; if (raw_block->compression_type() != kNoCompression) { s = UncompressBlockContents(raw_block->data(), raw_block->size(), &contents, - format_version, compression_dict); + format_version, compression_dict, ioptions); } if (!s.ok()) { delete raw_block; @@ -913,7 +914,7 @@ FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) { } BlockContents block; if (!ReadBlockContents(rep->file.get(), rep->footer, ReadOptions(), - rep->filter_handle, &block, rep->ioptions.env, + rep->filter_handle, &block, rep->ioptions, false /* decompress */, Slice() /*compression dict*/, rep->persistent_cache_options) .ok()) { @@ -1148,7 +1149,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( } s = GetDataBlockFromCache( - key, ckey, block_cache, block_cache_compressed, statistics, ro, &block, + key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro, &block, rep->table_options.format_version, compression_dict); if (block.value == nullptr && !no_io && ro.fill_cache) { @@ -1156,15 +1157,14 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( { StopWatch sw(rep->ioptions.env, statistics, READ_BLOCK_GET_MICROS); s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle, - &raw_block, rep->ioptions.env, + &raw_block, rep->ioptions, block_cache_compressed == nullptr, - compression_dict, rep->persistent_cache_options, - rep->ioptions.info_log); + compression_dict, rep->persistent_cache_options); } if (s.ok()) { s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed, - ro, statistics, &block, raw_block.release(), + ro, rep->ioptions, &block, raw_block.release(), rep->table_options.format_version, compression_dict); } @@ -1184,9 +1184,8 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator( } std::unique_ptr block_value; s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle, - &block_value, rep->ioptions.env, true /* compress */, - compression_dict, rep->persistent_cache_options, - rep->ioptions.info_log); + &block_value, rep->ioptions, true /* compress */, + compression_dict, rep->persistent_cache_options); if (s.ok()) { block.value = block_value.release(); } @@ -1510,8 +1509,9 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, handle, cache_key_storage); Slice ckey; - s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, nullptr, - options, &block, rep_->table_options.format_version, + s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, + rep_->ioptions, options, &block, + rep_->table_options.format_version, rep_->compression_dict_block ? rep_->compression_dict_block->data : Slice()); @@ -1544,11 +1544,8 @@ Status BlockBasedTable::CreateIndexReader( } auto file = rep_->file.get(); - auto env = rep_->ioptions.env; auto comparator = &rep_->internal_comparator; const Footer& footer = rep_->footer; - Statistics* stats = rep_->ioptions.statistics; - if (index_type_on_file == BlockBasedTableOptions::kHashSearch && rep_->ioptions.prefix_extractor == nullptr) { Log(InfoLogLevel::WARN_LEVEL, rep_->ioptions.info_log, @@ -1561,8 +1558,8 @@ Status BlockBasedTable::CreateIndexReader( switch (index_type_on_file) { case BlockBasedTableOptions::kBinarySearch: { return BinarySearchIndexReader::Create( - file, footer, footer.index_handle(), env, comparator, index_reader, - rep_->persistent_cache_options, stats); + file, footer, footer.index_handle(), rep_->ioptions, comparator, + index_reader, rep_->persistent_cache_options); } case BlockBasedTableOptions::kHashSearch: { std::unique_ptr meta_guard; @@ -1577,8 +1574,8 @@ Status BlockBasedTable::CreateIndexReader( "Unable to read the metaindex block." " Fall back to binary search index."); return BinarySearchIndexReader::Create( - file, footer, footer.index_handle(), env, comparator, - index_reader, rep_->persistent_cache_options, stats); + file, footer, footer.index_handle(), rep_->ioptions, comparator, + index_reader, rep_->persistent_cache_options); } meta_index_iter = meta_iter_guard.get(); } @@ -1588,10 +1585,9 @@ Status BlockBasedTable::CreateIndexReader( rep_->internal_prefix_transform.reset( new InternalKeySliceTransform(rep_->ioptions.prefix_extractor)); return HashIndexReader::Create( - rep_->internal_prefix_transform.get(), footer, file, env, comparator, - footer.index_handle(), meta_index_iter, index_reader, - rep_->hash_index_allow_collision, rep_->persistent_cache_options, - stats); + rep_->internal_prefix_transform.get(), footer, file, rep_->ioptions, + comparator, footer.index_handle(), meta_index_iter, index_reader, + rep_->hash_index_allow_collision, rep_->persistent_cache_options); } default: { std::string error_message = @@ -1711,7 +1707,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { BlockContents block; if (ReadBlockContents( rep_->file.get(), rep_->footer, ReadOptions(), handle, &block, - rep_->ioptions.env, false /*decompress*/, + rep_->ioptions, false /*decompress*/, Slice() /*compression dict*/, rep_->persistent_cache_options) .ok()) { rep_->filter.reset(new BlockBasedFilterBlockReader( diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 28447687b..84a990a6f 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -177,8 +177,8 @@ class BlockBasedTable : public TableReader { // dictionary. static Status GetDataBlockFromCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, - Cache* block_cache, Cache* block_cache_compressed, Statistics* statistics, - const ReadOptions& read_options, + Cache* block_cache, Cache* block_cache_compressed, + const ImmutableCFOptions &ioptions, const ReadOptions& read_options, BlockBasedTable::CachableEntry* block, uint32_t format_version, const Slice& compression_dict); @@ -195,7 +195,7 @@ class BlockBasedTable : public TableReader { static Status PutDataBlockToCache( const Slice& block_cache_key, const Slice& compressed_block_cache_key, Cache* block_cache, Cache* block_cache_compressed, - const ReadOptions& read_options, Statistics* statistics, + const ReadOptions& read_options, const ImmutableCFOptions &ioptions, CachableEntry* block, Block* raw_block, uint32_t format_version, const Slice& compression_dict); diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index 2fda8044e..ac5bed157 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -49,12 +49,16 @@ class CuckooBuilderTest : public testing::Test { uint64_t read_file_size; ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); + Options options; + options.allow_mmap_reads = true; + ImmutableCFOptions ioptions(options); + // Assert Table Properties. TableProperties* props = nullptr; unique_ptr file_reader( new RandomAccessFileReader(std::move(read_file))); ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size, - kCuckooTableMagicNumber, env_, nullptr, + kCuckooTableMagicNumber, ioptions, &props)); // Check unused bucket. std::string unused_key = props->user_collected_properties[ diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index fb1aef2e8..f6d69154e 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -45,7 +45,7 @@ CuckooTableReader::CuckooTableReader( } TableProperties* props = nullptr; status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber, - ioptions.env, ioptions.info_log, &props); + ioptions, &props); if (!status_.ok()) { return; } diff --git a/table/format.cc b/table/format.cc index be86ef8f8..3675bbade 100644 --- a/table/format.cc +++ b/table/format.cc @@ -23,6 +23,9 @@ #include "util/perf_context_imp.h" #include "util/string_util.h" #include "util/xxhash.h" +#include "util/statistics.h" +#include "util/stop_watch.h" + namespace rocksdb { @@ -39,6 +42,11 @@ const uint64_t kPlainTableMagicNumber = 0; #endif const uint32_t DefaultStackBufferSize = 5000; +bool ShouldReportDetailedTime(Env* env, Statistics* stats) { + return env != nullptr && stats != nullptr && + stats->stats_level_ > kExceptDetailedTimers; +} + void BlockHandle::EncodeTo(std::string* dst) const { // Sanity check that all fields have been set assert(offset_ != ~static_cast(0)); @@ -297,10 +305,10 @@ Status ReadBlock(RandomAccessFileReader* file, const Footer& footer, Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, const ReadOptions& read_options, const BlockHandle& handle, BlockContents* contents, - Env* env, bool decompression_requested, + const ImmutableCFOptions &ioptions, + bool decompression_requested, const Slice& compression_dict, - const PersistentCacheOptions& cache_options, - Logger* info_log) { + const PersistentCacheOptions& cache_options) { Status status; Slice slice; size_t n = static_cast(handle.size()); @@ -318,9 +326,9 @@ Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, return status; } else { // uncompressed page is not found - if (info_log && !status.IsNotFound()) { + if (ioptions.info_log && !status.IsNotFound()) { assert(!status.ok()); - Log(InfoLogLevel::INFO_LEVEL, info_log, + Log(InfoLogLevel::INFO_LEVEL, ioptions.info_log, "Error reading from persistent cache. %s", status.ToString().c_str()); } @@ -341,9 +349,9 @@ Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, used_buf = heap_buf.get(); slice = Slice(heap_buf.get(), n); } else { - if (info_log && !status.IsNotFound()) { + if (ioptions.info_log && !status.IsNotFound()) { assert(!status.ok()); - Log(InfoLogLevel::INFO_LEVEL, info_log, + Log(InfoLogLevel::INFO_LEVEL, ioptions.info_log, "Error reading from persistent cache. %s", status.ToString().c_str()); } // cache miss read from device @@ -378,7 +386,8 @@ Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, if (decompression_requested && compression_type != kNoCompression) { // compressed page, uncompress, update cache status = UncompressBlockContents(slice.data(), n, contents, - footer.version(), compression_dict); + footer.version(), compression_dict, + ioptions); } else if (slice.data() != used_buf) { // the slice content is not the buffer provided *contents = BlockContents(Slice(slice.data(), n), false, compression_type); @@ -405,11 +414,13 @@ Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer, Status UncompressBlockContentsForCompressionType( const char* data, size_t n, BlockContents* contents, uint32_t format_version, const Slice& compression_dict, - CompressionType compression_type) { + CompressionType compression_type, const ImmutableCFOptions &ioptions) { std::unique_ptr ubuf; assert(compression_type != kNoCompression && "Invalid compression type"); + StopWatchNano timer(ioptions.env, + ShouldReportDetailedTime(ioptions.env, ioptions.statistics)); int decompress_size = 0; switch (compression_type) { case kSnappyCompression: { @@ -501,6 +512,13 @@ Status UncompressBlockContentsForCompressionType( return Status::Corruption("bad block type"); } + if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){ + MeasureTime(ioptions.statistics, DECOMPRESSION_TIMES_NANOS, + timer.ElapsedNanos()); + MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size()); + RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED); + } + return Status::OK(); } @@ -513,11 +531,12 @@ Status UncompressBlockContentsForCompressionType( // format_version is the block format as defined in include/rocksdb/table.h Status UncompressBlockContents(const char* data, size_t n, BlockContents* contents, uint32_t format_version, - const Slice& compression_dict) { + const Slice& compression_dict, + const ImmutableCFOptions &ioptions) { assert(data[n] != kNoCompression); return UncompressBlockContentsForCompressionType( data, n, contents, format_version, compression_dict, - (CompressionType)data[n]); + (CompressionType)data[n], ioptions); } } // namespace rocksdb diff --git a/table/format.h b/table/format.h index 2ca7d2520..571659d59 100644 --- a/table/format.h +++ b/table/format.h @@ -24,6 +24,8 @@ class Block; class RandomAccessFile; struct ReadOptions; +extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); + // the length of the magic number in bytes. const int kMagicNumberLengthByte = 8; @@ -212,10 +214,9 @@ struct BlockContents { extern Status ReadBlockContents( RandomAccessFileReader* file, const Footer& footer, const ReadOptions& options, const BlockHandle& handle, - BlockContents* contents, Env* env, bool do_uncompress = true, - const Slice& compression_dict = Slice(), - const PersistentCacheOptions& cache_options = PersistentCacheOptions(), - Logger* info_log = nullptr); + BlockContents* contents, const ImmutableCFOptions &ioptions, + bool do_uncompress = true, const Slice& compression_dict = Slice(), + const PersistentCacheOptions& cache_options = PersistentCacheOptions()); // The 'data' points to the raw block contents read in from file. // This method allocates a new heap buffer and the raw block @@ -227,7 +228,8 @@ extern Status ReadBlockContents( extern Status UncompressBlockContents(const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, - const Slice& compression_dict); + const Slice& compression_dict, + const ImmutableCFOptions &ioptions); // This is an extension to UncompressBlockContents that accepts // a specific compression type. This is used by un-wrapped blocks @@ -235,7 +237,7 @@ extern Status UncompressBlockContents(const char* data, size_t n, extern Status UncompressBlockContentsForCompressionType( const char* data, size_t n, BlockContents* contents, uint32_t compress_format_version, const Slice& compression_dict, - CompressionType compression_type); + CompressionType compression_type, const ImmutableCFOptions &ioptions); // Implementation details follow. Clients should ignore, diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 2cbe9eca7..e98a638e0 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -150,7 +150,7 @@ bool NotifyCollectTableCollectorsOnFinish( } Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, - const Footer& footer, Env* env, Logger* logger, + const Footer& footer, const ImmutableCFOptions &ioptions, TableProperties** table_properties) { assert(table_properties); @@ -165,7 +165,7 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, read_options.verify_checksums = false; Status s; s = ReadBlockContents(file, footer, read_options, handle, &block_contents, - env, false /* decompress */); + ioptions, false /* decompress */); if (!s.ok()) { return s; @@ -219,7 +219,8 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, auto error_msg = "Detect malformed value in properties meta-block:" "\tkey: " + key + "\tval: " + raw_val.ToString(); - Log(InfoLogLevel::ERROR_LEVEL, logger, "%s", error_msg.c_str()); + Log(InfoLogLevel::ERROR_LEVEL, ioptions.info_log, "%s", + error_msg.c_str()); continue; } *(pos->second) = val; @@ -251,8 +252,9 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, } Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, - Logger* info_log, TableProperties** properties) { + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, + TableProperties** properties) { // -- Read metaindex block Footer footer; auto s = ReadFooterFromFile(file, file_size, &footer, table_magic_number); @@ -265,7 +267,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, ReadOptions read_options; read_options.verify_checksums = false; s = ReadBlockContents(file, footer, read_options, metaindex_handle, - &metaindex_contents, env, false /* decompress */); + &metaindex_contents, ioptions, false /* decompress */); if (!s.ok()) { return s; } @@ -282,8 +284,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, TableProperties table_properties; if (found_properties_block == true) { - s = ReadProperties(meta_iter->value(), file, footer, env, info_log, - properties); + s = ReadProperties(meta_iter->value(), file, footer, ioptions, properties); } else { s = Status::NotFound(); } @@ -305,7 +306,8 @@ Status FindMetaBlock(InternalIterator* meta_index_iter, } Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, const std::string& meta_block_name, BlockHandle* block_handle) { Footer footer; @@ -319,7 +321,7 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, ReadOptions read_options; read_options.verify_checksums = false; s = ReadBlockContents(file, footer, read_options, metaindex_handle, - &metaindex_contents, env, false /* do decompression */); + &metaindex_contents, ioptions, false /* do decompression */); if (!s.ok()) { return s; } @@ -332,7 +334,8 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, } Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, const std::string& meta_block_name, BlockContents* contents) { Status status; @@ -348,7 +351,8 @@ Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, ReadOptions read_options; read_options.verify_checksums = false; status = ReadBlockContents(file, footer, read_options, metaindex_handle, - &metaindex_contents, env, false /* decompress */); + &metaindex_contents, ioptions, + false /* decompress */); if (!status.ok()) { return status; } @@ -368,7 +372,7 @@ Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, // Reading metablock return ReadBlockContents(file, footer, read_options, block_handle, contents, - env, false /* decompress */); + ioptions, false /* decompress */); } } // namespace rocksdb diff --git a/table/meta_blocks.h b/table/meta_blocks.h index ab4f7e127..99084d790 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -94,7 +94,7 @@ bool NotifyCollectTableCollectorsOnFinish( // *table_properties will point to a heap-allocated TableProperties // object, otherwise value of `table_properties` will not be modified. Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, - const Footer& footer, Env* env, Logger* logger, + const Footer& footer, const ImmutableCFOptions &ioptions, TableProperties** table_properties); // Directly read the properties from the properties block of a plain table. @@ -102,8 +102,9 @@ Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, // *table_properties will point to a heap-allocated TableProperties // object, otherwise value of `table_properties` will not be modified. Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, - Logger* info_log, TableProperties** properties); + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, + TableProperties** properties); // Find the meta block from the meta index block. Status FindMetaBlock(InternalIterator* meta_index_iter, @@ -112,7 +113,8 @@ Status FindMetaBlock(InternalIterator* meta_index_iter, // Find the meta block Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, const std::string& meta_block_name, BlockHandle* block_handle); @@ -120,7 +122,8 @@ Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, // from `file` and initialize `contents` with contents of this block. // Return Status::OK in case of success. Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, - uint64_t table_magic_number, Env* env, + uint64_t table_magic_number, + const ImmutableCFOptions &ioptions, const std::string& meta_block_name, BlockContents* contents); diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index a6db428e4..a5155254b 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -128,7 +128,7 @@ Status PlainTableReader::Open(const ImmutableCFOptions& ioptions, TableProperties* props = nullptr; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, - ioptions.env, ioptions.info_log, &props); + ioptions, &props); if (!s.ok()) { return s; } @@ -293,13 +293,13 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, BlockContents bloom_block_contents; auto s = ReadMetaBlock(file_info_.file.get(), file_size_, - kPlainTableMagicNumber, ioptions_.env, + kPlainTableMagicNumber, ioptions_, BloomBlockBuilder::kBloomBlock, &bloom_block_contents); bool index_in_file = s.ok(); BlockContents index_block_contents; s = ReadMetaBlock( - file_info_.file.get(), file_size_, kPlainTableMagicNumber, ioptions_.env, + file_info_.file.get(), file_size_, kPlainTableMagicNumber, ioptions_, PlainTableIndexBuilder::kPlainTableIndexBlock, &index_block_contents); index_in_file &= s.ok(); diff --git a/table/table_test.cc b/table/table_test.cc index d9cb799b5..b7b23a777 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -2064,7 +2064,7 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { TableProperties* props = nullptr; auto s = ReadTableProperties(file_reader.get(), ss->contents().size(), - kPlainTableMagicNumber, Env::Default(), nullptr, + kPlainTableMagicNumber, ioptions, &props); std::unique_ptr props_guard(props); ASSERT_OK(s); diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 55326a283..4b8b92e6b 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -219,8 +219,7 @@ Status SstFileReader::ReadTableProperties(uint64_t table_magic_number, uint64_t file_size) { TableProperties* table_properties = nullptr; Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number, - options_.env, options_.info_log.get(), - &table_properties); + ioptions_, &table_properties); if (s.ok()) { table_properties_.reset(table_properties); } else { diff --git a/util/coding.h b/util/coding.h index 5bc866438..cff896d36 100644 --- a/util/coding.h +++ b/util/coding.h @@ -154,7 +154,8 @@ inline void EncodeFixed64(char* buf, uint64_t value) { // Pull the last 8 bits and cast it to a character inline void PutFixed32(std::string* dst, uint32_t value) { #if __BYTE_ORDER__ == __LITTLE_ENDIAN__ - dst->append(static_cast(&value), sizeof(value)); + dst->append(const_cast(reinterpret_cast(&value)), + sizeof(value)); #else char buf[sizeof(value)]; EncodeFixed32(buf, value); @@ -164,7 +165,8 @@ inline void PutFixed32(std::string* dst, uint32_t value) { inline void PutFixed64(std::string* dst, uint64_t value) { #if __BYTE_ORDER__ == __LITTLE_ENDIAN__ - dst->append(static_const(&value), sizeof(value)); + dst->append(const_cast(reinterpret_cast(&value)), + sizeof(value)); #else char buf[sizeof(value)]; EncodeFixed64(buf, value);