rocksdb/table/block_based/partitioned_index_iterator.h

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "table/block_based/block_based_table_reader.h"

#include "table/block_based/block_based_table_reader_impl.h"
#include "table/block_based/block_prefetcher.h"
#include "table/block_based/reader_common.h"

namespace ROCKSDB_NAMESPACE {
// Iterator that iterates over partitioned index.
// Some upper and lower bound tricks played in block based table iterators
// could be played here, but it's too complicated to reason about index
// keys with upper or lower bound, so we skip it for simplicity.
class ParititionedIndexIterator : public InternalIteratorBase<IndexValue> {
  // compaction_readahead_size: its value will only be used if for_compaction =
  // true
 public:
  ParititionedIndexIterator(
      const BlockBasedTable* table, const ReadOptions& read_options,
      const InternalKeyComparator& icomp,
      std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter,
      TableReaderCaller caller, size_t compaction_readahead_size = 0)
      : table_(table),
        read_options_(read_options),
#ifndef NDEBUG
        icomp_(icomp),
#endif
        user_comparator_(icomp.user_comparator()),
        index_iter_(std::move(index_iter)),
        block_iter_points_to_real_block_(false),
        lookup_context_(caller),
        block_prefetcher_(compaction_readahead_size) {}

  ~ParititionedIndexIterator() {}

  void Seek(const Slice& target) override;
  void SeekForPrev(const Slice&) override {
    // Shouldn't be called.
    assert(false);
  }
  void SeekToFirst() override;
  void SeekToLast() override;
  void Next() final override;
  bool NextAndGetResult(IterateResult*) override {
    assert(false);
    return false;
  }
  void Prev() override;
  bool Valid() const override {
    return block_iter_points_to_real_block_ && block_iter_.Valid();
  }
  Slice key() const override {
    assert(Valid());
    return block_iter_.key();
  }
  Slice user_key() const override {
    assert(Valid());
    return block_iter_.user_key();
  }
  IndexValue value() const override {
    assert(Valid());
    return block_iter_.value();
  }
  Status status() const override {
    // Prefix index set status to NotFound when the prefix does not exist
    if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {
      return index_iter_->status();
    } else if (block_iter_points_to_real_block_) {
      return block_iter_.status();
    } else {
      return Status::OK();
    }
  }

  // Whether iterator invalidated for being out of bound.
  bool IsOutOfBound() override {
    // Shoulldn't be called
    assert(false);
    return false;
  }

  inline bool MayBeOutOfUpperBound() override {
    // Shouldn't be called.
    assert(false);
    return true;
  }
  void SetPinnedItersMgr(PinnedIteratorsManager*) override {
    // Shouldn't be called.
    assert(false);
  }
  bool IsKeyPinned() const override {
    // Shouldn't be called.
    assert(false);
    return false;
  }
  bool IsValuePinned() const override {
    // Shouldn't be called.
    assert(false);
    return false;
  }

  void ResetPartitionedIndexIter() {
    if (block_iter_points_to_real_block_) {
      block_iter_.Invalidate(Status::OK());
      block_iter_points_to_real_block_ = false;
    }
  }

  void SavePrevIndexValue() {
    if (block_iter_points_to_real_block_) {
      // Reseek. If they end up with the same data block, we shouldn't re-fetch
      // the same data block.
      prev_block_offset_ = index_iter_->value().handle.offset();
    }
  }

 private:
  friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test;
  const BlockBasedTable* table_;
  const ReadOptions read_options_;
#ifndef NDEBUG
  const InternalKeyComparator& icomp_;
#endif
  UserComparatorWrapper user_comparator_;
  std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
  IndexBlockIter block_iter_;

  // True if block_iter_ is initialized and points to the same block
  // as index iterator.
  bool block_iter_points_to_real_block_;
  uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max();
  BlockCacheLookupContext lookup_context_;
  BlockPrefetcher block_prefetcher_;

  // If `target` is null, seek to first.
  void SeekImpl(const Slice* target);

  void InitPartitionedIndexBlock();
  void FindKeyForward();
  void FindBlockForward();
  void FindKeyBackward();
};
}  // namespace ROCKSDB_NAMESPACE
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.`
			`// This source code is licensed under both the GPLv2 (found in the`
			`// COPYING file in the root directory) and Apache 2.0 License`
			`// (found in the LICENSE.Apache file in the root directory).`
			`//`
			`// Copyright (c) 2011 The LevelDB Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style license that can be`
			`// found in the LICENSE file. See the AUTHORS file for names of contributors.`
			`#pragma once`
			`#include "table/block_based/block_based_table_reader.h"`

			`#include "table/block_based/block_based_table_reader_impl.h"`
			`#include "table/block_based/block_prefetcher.h"`
			`#include "table/block_based/reader_common.h"`

			`namespace ROCKSDB_NAMESPACE {`
			`// Iterator that iterates over partitioned index.`
			`// Some upper and lower bound tricks played in block based table iterators`
			`// could be played here, but it's too complicated to reason about index`
			`// keys with upper or lower bound, so we skip it for simplicity.`
			`class ParititionedIndexIterator : public InternalIteratorBase<IndexValue> {`
			`// compaction_readahead_size: its value will only be used if for_compaction =`
			`// true`
			`public:`
			`ParititionedIndexIterator(`
			`const BlockBasedTable* table, const ReadOptions& read_options,`
			`const InternalKeyComparator& icomp,`
			`std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter,`
			`TableReaderCaller caller, size_t compaction_readahead_size = 0)`
			`: table_(table),`
			`read_options_(read_options),`
Fix compiler warning treated as error (#6547) Summary: Define a private member variable only in debug mode. Without fix, build will fail ``` In file included from table/block_based/partitioned_index_iterator.cc:9: ./table/block_based/partitioned_index_iterator.h:125:32: error: private field 'icomp_' is not used [-Werror,-Wunused-private-field] const InternalKeyComparator& icomp_; ``` Test plan (dev server) 1. make check 2. Make sure fixed in Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/6547 Reviewed By: siying Differential Revision: D20480027 Pulled By: pdillinger fbshipit-source-id: 288bc94280e240c3136335b6c73eb1ccb0db459d 2020-03-17 17:56:15 +01:00			`#ifndef NDEBUG`
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`icomp_(icomp),`
Fix compiler warning treated as error (#6547) Summary: Define a private member variable only in debug mode. Without fix, build will fail ``` In file included from table/block_based/partitioned_index_iterator.cc:9: ./table/block_based/partitioned_index_iterator.h:125:32: error: private field 'icomp_' is not used [-Werror,-Wunused-private-field] const InternalKeyComparator& icomp_; ``` Test plan (dev server) 1. make check 2. Make sure fixed in Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/6547 Reviewed By: siying Differential Revision: D20480027 Pulled By: pdillinger fbshipit-source-id: 288bc94280e240c3136335b6c73eb1ccb0db459d 2020-03-17 17:56:15 +01:00			`#endif`
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`user_comparator_(icomp.user_comparator()),`
			`index_iter_(std::move(index_iter)),`
			`block_iter_points_to_real_block_(false),`
			`lookup_context_(caller),`
			`block_prefetcher_(compaction_readahead_size) {}`

			`~ParititionedIndexIterator() {}`

			`void Seek(const Slice& target) override;`
			`void SeekForPrev(const Slice&) override {`
			`// Shouldn't be called.`
			`assert(false);`
			`}`
			`void SeekToFirst() override;`
			`void SeekToLast() override;`
			`void Next() final override;`
			`bool NextAndGetResult(IterateResult*) override {`
			`assert(false);`
			`return false;`
			`}`
			`void Prev() override;`
			`bool Valid() const override {`
			`return block_iter_points_to_real_block_ && block_iter_.Valid();`
			`}`
			`Slice key() const override {`
			`assert(Valid());`
			`return block_iter_.key();`
			`}`
			`Slice user_key() const override {`
			`assert(Valid());`
			`return block_iter_.user_key();`
			`}`
			`IndexValue value() const override {`
			`assert(Valid());`
			`return block_iter_.value();`
			`}`
			`Status status() const override {`
			`// Prefix index set status to NotFound when the prefix does not exist`
			`if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {`
			`return index_iter_->status();`
			`} else if (block_iter_points_to_real_block_) {`
			`return block_iter_.status();`
			`} else {`
			`return Status::OK();`
			`}`
			`}`

			`// Whether iterator invalidated for being out of bound.`
			`bool IsOutOfBound() override {`
			`// Shoulldn't be called`
			`assert(false);`
			`return false;`
			`}`

			`inline bool MayBeOutOfUpperBound() override {`
			`// Shouldn't be called.`
			`assert(false);`
			`return true;`
			`}`
			`void SetPinnedItersMgr(PinnedIteratorsManager*) override {`
			`// Shouldn't be called.`
			`assert(false);`
			`}`
			`bool IsKeyPinned() const override {`
			`// Shouldn't be called.`
			`assert(false);`
			`return false;`
			`}`
			`bool IsValuePinned() const override {`
			`// Shouldn't be called.`
			`assert(false);`
			`return false;`
			`}`

			`void ResetPartitionedIndexIter() {`
			`if (block_iter_points_to_real_block_) {`
			`block_iter_.Invalidate(Status::OK());`
			`block_iter_points_to_real_block_ = false;`
			`}`
			`}`

			`void SavePrevIndexValue() {`
			`if (block_iter_points_to_real_block_) {`
			`// Reseek. If they end up with the same data block, we shouldn't re-fetch`
			`// the same data block.`
			`prev_block_offset_ = index_iter_->value().handle.offset();`
			`}`
			`}`

			`private:`
Check iterator status BlockBasedTableReader::VerifyChecksumInBlocks() (#6909) Summary: The ```for``` loop in ```VerifyChecksumInBlocks``` only checks ```index_iter->Valid()``` which could be ```false``` either due to reaching the end of the index or, in case of partitioned index, it could be due to a checksum mismatch error when reading a 2nd level index block. Instead of throwing away the index iterator status, we need to return any errors back to the caller. Tests: Add a test in block_based_table_reader_test.cc. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6909 Reviewed By: pdillinger Differential Revision: D21833922 Pulled By: anand1976 fbshipit-source-id: bc778ebf1121dbbdd768689de5183f07a9f0beae 2020-06-05 20:06:26 +02:00			`friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test;`
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`const BlockBasedTable* table_;`
			`const ReadOptions read_options_;`
Fix compiler warning treated as error (#6547) Summary: Define a private member variable only in debug mode. Without fix, build will fail ``` In file included from table/block_based/partitioned_index_iterator.cc:9: ./table/block_based/partitioned_index_iterator.h:125:32: error: private field 'icomp_' is not used [-Werror,-Wunused-private-field] const InternalKeyComparator& icomp_; ``` Test plan (dev server) 1. make check 2. Make sure fixed in Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/6547 Reviewed By: siying Differential Revision: D20480027 Pulled By: pdillinger fbshipit-source-id: 288bc94280e240c3136335b6c73eb1ccb0db459d 2020-03-17 17:56:15 +01:00			`#ifndef NDEBUG`
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`const InternalKeyComparator& icomp_;`
Fix compiler warning treated as error (#6547) Summary: Define a private member variable only in debug mode. Without fix, build will fail ``` In file included from table/block_based/partitioned_index_iterator.cc:9: ./table/block_based/partitioned_index_iterator.h:125:32: error: private field 'icomp_' is not used [-Werror,-Wunused-private-field] const InternalKeyComparator& icomp_; ``` Test plan (dev server) 1. make check 2. Make sure fixed in Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/6547 Reviewed By: siying Differential Revision: D20480027 Pulled By: pdillinger fbshipit-source-id: 288bc94280e240c3136335b6c73eb1ccb0db459d 2020-03-17 17:56:15 +01:00			`#endif`
De-template block based table iterator (#6531) Summary: Right now block based table iterator is used as both of iterating data for block based table, and for the index iterator for partitioend index. This was initially convenient for introducing a new iterator and block type for new index format, while reducing code change. However, these two usage doesn't go with each other very well. For example, Prev() is never called for partitioned index iterator, and some other complexity is maintained in block based iterators, which is not needed for index iterator but maintainers will always need to reason about it. Furthermore, the template usage is not following Google C++ Style which we are following, and makes a large chunk of code tangled together. This commit separate the two iterators. Right now, here is what it is done: 1. Copy the block based iterator code into partitioned index iterator, and de-template them. 2. Remove some code not needed for partitioned index. The upper bound check and tricks are removed. We never tested performance for those tricks when partitioned index is enabled in the first place. It's unlikelyl to generate performance regression, as creating new partitioned index block is much rarer than data blocks. 3. Separate out the prefetch logic to a helper class and both classes call them. This commit will enable future follow-ups. One direction is that we might separate index iterator interface for data blocks and index blocks, as they are quite different. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6531 Test Plan: build using make and cmake. And build release Differential Revision: D20473108 fbshipit-source-id: e48011783b339a4257c204cc07507b171b834b0f 2020-03-16 20:17:34 +01:00			`UserComparatorWrapper user_comparator_;`
			`std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;`
			`IndexBlockIter block_iter_;`

			`// True if block_iter_ is initialized and points to the same block`
			`// as index iterator.`
			`bool block_iter_points_to_real_block_;`
			`uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max();`
			`BlockCacheLookupContext lookup_context_;`
			`BlockPrefetcher block_prefetcher_;`

			// If `target` is null, seek to first.
			`void SeekImpl(const Slice* target);`

			`void InitPartitionedIndexBlock();`
			`void FindKeyForward();`
			`void FindBlockForward();`
			`void FindKeyBackward();`
			`};`
			`} // namespace ROCKSDB_NAMESPACE`