diff --git a/HISTORY.md b/HISTORY.md index 7e516d85a..19a2dfc16 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## 5.15.3 (8/10/2018) +### Bug Fixes +* Fix a bug in misreporting the estimated partition index size in properties block. + ## 5.15.2 (8/9/2018) ### Bug Fixes * Return correct usable_size for BlockContents. diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index df094ffa2..e8db6a4bd 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -755,7 +755,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->props.filter_policy_name = rep_->table_options.filter_policy != nullptr ? rep_->table_options.filter_policy->Name() : ""; - rep_->props.index_size = rep_->index_builder->EstimatedSize() + kBlockTrailerSize; + rep_->props.index_size = + rep_->index_builder->IndexSize() + kBlockTrailerSize; rep_->props.comparator_name = rep_->ioptions.user_comparator != nullptr ? rep_->ioptions.user_comparator->Name() : "nullptr"; @@ -784,7 +785,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock( assert(rep_->p_index_builder_ != nullptr); rep_->props.index_partitions = rep_->p_index_builder_->NumPartitions(); rep_->props.top_level_index_size = - rep_->p_index_builder_->EstimateTopLevelIndexSize(rep_->offset); + rep_->p_index_builder_->TopLevelIndexSize(rep_->offset); } rep_->props.index_key_is_user_key = !rep_->index_builder->seperator_is_key_plus_seq(); diff --git a/table/index_builder.cc b/table/index_builder.cc index ebabbeb8d..0d8d3b8e6 100644 --- a/table/index_builder.cc +++ b/table/index_builder.cc @@ -70,6 +70,12 @@ PartitionedIndexBuilder::PartitionedIndexBuilder( table_opt.format_version), sub_index_builder_(nullptr), table_opt_(table_opt), + // We start by false. After each partition we revise the value based on + // what the sub_index_builder has decided. If the feature is disabled + // entirely, this will be set to true after switching the first + // sub_index_builder. Otherwise, it could be set to true even one of the + // sub_index_builders could not safely exclude seq from the keys, then it + // wil be enforced on all sub_index_builders on ::Finish. seperator_is_key_plus_seq_(false) {} PartitionedIndexBuilder::~PartitionedIndexBuilder() { @@ -83,7 +89,11 @@ void PartitionedIndexBuilder::MakeNewSubIndexBuilder() { table_opt_.format_version); flush_policy_.reset(FlushBlockBySizePolicyFactory::NewFlushBlockPolicy( table_opt_.metadata_block_size, table_opt_.block_size_deviation, - sub_index_builder_->index_block_builder_)); + // Note: this is sub-optimal since sub_index_builder_ could later reset + // seperator_is_key_plus_seq_ but the probability of that is low. + sub_index_builder_->seperator_is_key_plus_seq_ + ? sub_index_builder_->index_block_builder_ + : sub_index_builder_->index_block_builder_without_seq_)); partition_cut_requested_ = false; } @@ -143,6 +153,9 @@ void PartitionedIndexBuilder::AddIndexEntry( Status PartitionedIndexBuilder::Finish( IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) { + if (partition_cnt_ == 0) { + partition_cnt_ = entries_.size(); + } // It must be set to null after last key is added assert(sub_index_builder_ == nullptr); if (finishing_indexes == true) { @@ -164,6 +177,8 @@ Status PartitionedIndexBuilder::Finish( index_blocks->index_block_contents = index_block_builder_without_seq_.Finish(); } + top_level_index_size_ = index_blocks->index_block_contents.size(); + index_size_ += top_level_index_size_; return Status::OK(); } else { // Finish the next partition index in line and Incomplete() to indicate we @@ -172,45 +187,13 @@ Status PartitionedIndexBuilder::Finish( // Apply the policy to all sub-indexes entry.value->seperator_is_key_plus_seq_ = seperator_is_key_plus_seq_; auto s = entry.value->Finish(index_blocks); + index_size_ += index_blocks->index_block_contents.size(); finishing_indexes = true; return s.ok() ? Status::Incomplete() : s; } } -// Estimate size excluding the top-level index -// It is assumed that this method is called before writing index partition -// starts -size_t PartitionedIndexBuilder::EstimatedSize() const { - size_t total = 0; - for (auto it = entries_.begin(); it != entries_.end(); ++it) { - total += it->value->EstimatedSize(); - } - total += - sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize(); - return total; -} - -// Since when this method is called we do not know the index block offsets yet, -// the top-level index does not exist. Hence we estimate the block offsets and -// create a temporary top-level index. -size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize( - uint64_t offset) const { - BlockBuilder tmp_builder( - table_opt_.index_block_restart_interval); // tmp top-level index builder - for (auto it = entries_.begin(); it != entries_.end(); ++it) { - std::string tmp_handle_encoding; - uint64_t size = it->value->EstimatedSize(); - BlockHandle tmp_block_handle(offset, size); - tmp_block_handle.EncodeTo(&tmp_handle_encoding); - tmp_builder.Add( - seperator_is_key_plus_seq_ ? it->key : ExtractUserKey(it->key), - tmp_handle_encoding); - offset += size; - } - return tmp_builder.CurrentSizeEstimate(); -} - size_t PartitionedIndexBuilder::NumPartitions() const { - return entries_.size(); + return partition_cnt_; } } // namespace rocksdb diff --git a/table/index_builder.h b/table/index_builder.h index e8c060716..d77194a8b 100644 --- a/table/index_builder.h +++ b/table/index_builder.h @@ -96,13 +96,15 @@ class IndexBuilder { virtual Status Finish(IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) = 0; - // Get the estimated size for index block. - virtual size_t EstimatedSize() const = 0; + // Get the size for index block. Must be called after ::Finish. + virtual size_t IndexSize() const = 0; virtual bool seperator_is_key_plus_seq() { return true; } protected: const InternalKeyComparator* comparator_; + // Set after ::Finish is called + size_t index_size_ = 0; }; // This index builder builds space-efficient index block. @@ -162,15 +164,12 @@ class ShortenedIndexBuilder : public IndexBuilder { index_blocks->index_block_contents = index_block_builder_without_seq_.Finish(); } + index_size_ = index_blocks->index_block_contents.size(); return Status::OK(); } - virtual size_t EstimatedSize() const override { - if (seperator_is_key_plus_seq_) { - return index_block_builder_.CurrentSizeEstimate(); - } else { - return index_block_builder_without_seq_.CurrentSizeEstimate(); - } + virtual size_t IndexSize() const override { + return index_size_; } virtual bool seperator_is_key_plus_seq() override { @@ -272,8 +271,8 @@ class HashIndexBuilder : public IndexBuilder { return Status::OK(); } - virtual size_t EstimatedSize() const override { - return primary_index_builder_.EstimatedSize() + prefix_block_.size() + + virtual size_t IndexSize() const override { + return primary_index_builder_.IndexSize() + prefix_block_.size() + prefix_meta_block_.size(); } @@ -338,8 +337,12 @@ class PartitionedIndexBuilder : public IndexBuilder { IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override; - virtual size_t EstimatedSize() const override; - size_t EstimateTopLevelIndexSize(uint64_t) const; + virtual size_t IndexSize() const override { + return index_size_; + } + size_t TopLevelIndexSize(uint64_t) const { + return top_level_index_size_; + } size_t NumPartitions() const; inline bool ShouldCutFilterBlock() { @@ -362,6 +365,11 @@ class PartitionedIndexBuilder : public IndexBuilder { } private: + // Set after ::Finish is called + size_t top_level_index_size_ = 0; + // Set after ::Finish is called + size_t partition_cnt_ = 0; + void MakeNewSubIndexBuilder(); struct Entry {