Added SizeApproximationOptions to DB::GetApproximateSizes (#5626)
Summary: The new DB::GetApproximateSizes with SizeApproximationOptions argument, which allows to add more options/knobs to the DB::GetApproximateSizes call (beyond only the include_flags) Pull Request resolved: https://github.com/facebook/rocksdb/pull/5626 Differential Revision: D16496913 Pulled By: elipoz fbshipit-source-id: ee8c6c182330a285fa056ecfc3905a592b451720
This commit is contained in:
parent
ae152ee666
commit
9625a2bc2b
@ -21,6 +21,7 @@
|
|||||||
* Added new APIs ExportColumnFamily() and CreateColumnFamilyWithImport() to support export and import of a Column Family. https://github.com/facebook/rocksdb/issues/3469
|
* Added new APIs ExportColumnFamily() and CreateColumnFamilyWithImport() to support export and import of a Column Family. https://github.com/facebook/rocksdb/issues/3469
|
||||||
* ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator.
|
* ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator.
|
||||||
* Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env.
|
* Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env.
|
||||||
|
* Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors.
|
||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
* Add an option `snap_refresh_nanos` (default to 0) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature.
|
* Add an option `snap_refresh_nanos` (default to 0) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature.
|
||||||
@ -29,6 +30,7 @@
|
|||||||
* Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error.
|
* Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error.
|
||||||
* Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact.
|
* Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact.
|
||||||
* Compression dictionary blocks are now prefetched and pinned in the cache (based on the customer's settings) the same way as index and filter blocks.
|
* Compression dictionary blocks are now prefetched and pinned in the cache (based on the customer's settings) the same way as index and filter blocks.
|
||||||
|
* Added DBOptions::log_readahead_size which specifies the number of bytes to prefetch when reading the log. This is mostly useful for reading a remotely located log, as it can save the number of round-trips. If 0 (default), then the prefetching is disabled.
|
||||||
|
|
||||||
### Performance Improvements
|
### Performance Improvements
|
||||||
* Reduce binary search when iterator reseek into the same data block.
|
* Reduce binary search when iterator reseek into the same data block.
|
||||||
|
@ -2770,11 +2770,13 @@ void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
|
|||||||
ReturnAndCleanupSuperVersion(cfd, sv);
|
ReturnAndCleanupSuperVersion(cfd, sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family,
|
Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options,
|
||||||
const Range* range, int n, uint64_t* sizes,
|
ColumnFamilyHandle* column_family,
|
||||||
uint8_t include_flags) {
|
const Range* range, int n, uint64_t* sizes) {
|
||||||
assert(include_flags & DB::SizeApproximationFlags::INCLUDE_FILES ||
|
if (!options.include_memtabtles && !options.include_files) {
|
||||||
include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES);
|
return Status::InvalidArgument("Invalid options");
|
||||||
|
}
|
||||||
|
|
||||||
Version* v;
|
Version* v;
|
||||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||||
auto cfd = cfh->cfd();
|
auto cfd = cfh->cfd();
|
||||||
@ -2786,18 +2788,19 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family,
|
|||||||
InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
|
InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
|
||||||
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
|
InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
|
||||||
sizes[i] = 0;
|
sizes[i] = 0;
|
||||||
if (include_flags & DB::SizeApproximationFlags::INCLUDE_FILES) {
|
if (options.include_files) {
|
||||||
sizes[i] += versions_->ApproximateSize(
|
sizes[i] += versions_->ApproximateSize(
|
||||||
v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1,
|
v, k1.Encode(), k2.Encode(), /*start_level=*/0, /*end_level=*/-1,
|
||||||
TableReaderCaller::kUserApproximateSize);
|
TableReaderCaller::kUserApproximateSize);
|
||||||
}
|
}
|
||||||
if (include_flags & DB::SizeApproximationFlags::INCLUDE_MEMTABLES) {
|
if (options.include_memtabtles) {
|
||||||
sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size;
|
sizes[i] += sv->mem->ApproximateStats(k1.Encode(), k2.Encode()).size;
|
||||||
sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size;
|
sizes[i] += sv->imm->ApproximateStats(k1.Encode(), k2.Encode()).size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ReturnAndCleanupSuperVersion(cfd, sv);
|
ReturnAndCleanupSuperVersion(cfd, sv);
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::list<uint64_t>::iterator
|
std::list<uint64_t>::iterator
|
||||||
|
@ -233,9 +233,10 @@ class DBImpl : public DB {
|
|||||||
virtual bool GetAggregatedIntProperty(const Slice& property,
|
virtual bool GetAggregatedIntProperty(const Slice& property,
|
||||||
uint64_t* aggregated_value) override;
|
uint64_t* aggregated_value) override;
|
||||||
using DB::GetApproximateSizes;
|
using DB::GetApproximateSizes;
|
||||||
virtual void GetApproximateSizes(
|
virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
|
||||||
ColumnFamilyHandle* column_family, const Range* range, int n,
|
ColumnFamilyHandle* column_family,
|
||||||
uint64_t* sizes, uint8_t include_flags = INCLUDE_FILES) override;
|
const Range* range, int n,
|
||||||
|
uint64_t* sizes) override;
|
||||||
using DB::GetApproximateMemTableStats;
|
using DB::GetApproximateMemTableStats;
|
||||||
virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
|
virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
|
||||||
const Range& range,
|
const Range& range,
|
||||||
|
@ -2598,13 +2598,14 @@ class ModelDB : public DB {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
using DB::GetApproximateSizes;
|
using DB::GetApproximateSizes;
|
||||||
void GetApproximateSizes(ColumnFamilyHandle* /*column_family*/,
|
Status GetApproximateSizes(const SizeApproximationOptions& /*options*/,
|
||||||
const Range* /*range*/, int n, uint64_t* sizes,
|
ColumnFamilyHandle* /*column_family*/,
|
||||||
uint8_t /*include_flags*/
|
const Range* /*range*/, int n,
|
||||||
= INCLUDE_FILES) override {
|
uint64_t* sizes) override {
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
sizes[i] = 0;
|
sizes[i] = 0;
|
||||||
}
|
}
|
||||||
|
return Status::OK();
|
||||||
}
|
}
|
||||||
using DB::GetApproximateMemTableStats;
|
using DB::GetApproximateMemTableStats;
|
||||||
void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/,
|
void GetApproximateMemTableStats(ColumnFamilyHandle* /*column_family*/,
|
||||||
|
@ -808,7 +808,7 @@ class DB {
|
|||||||
// stats should be included, or file stats approximation or both
|
// stats should be included, or file stats approximation or both
|
||||||
enum SizeApproximationFlags : uint8_t {
|
enum SizeApproximationFlags : uint8_t {
|
||||||
NONE = 0,
|
NONE = 0,
|
||||||
INCLUDE_MEMTABLES = 1,
|
INCLUDE_MEMTABLES = 1 << 0,
|
||||||
INCLUDE_FILES = 1 << 1
|
INCLUDE_FILES = 1 << 1
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -818,14 +818,24 @@ class DB {
|
|||||||
// Note that the returned sizes measure file system space usage, so
|
// Note that the returned sizes measure file system space usage, so
|
||||||
// if the user data compresses by a factor of ten, the returned
|
// if the user data compresses by a factor of ten, the returned
|
||||||
// sizes will be one-tenth the size of the corresponding user data size.
|
// sizes will be one-tenth the size of the corresponding user data size.
|
||||||
//
|
virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
|
||||||
// If include_flags defines whether the returned size should include
|
ColumnFamilyHandle* column_family,
|
||||||
// the recently written data in the mem-tables (if
|
const Range* range, int n,
|
||||||
// the mem-table type supports it), data serialized to disk, or both.
|
uint64_t* sizes) = 0;
|
||||||
// include_flags should be of type DB::SizeApproximationFlags
|
|
||||||
|
// Simpler versions of the GetApproximateSizes() method above.
|
||||||
|
// The include_flags argumenbt must of type DB::SizeApproximationFlags
|
||||||
|
// and can not be NONE.
|
||||||
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
|
||||||
const Range* range, int n, uint64_t* sizes,
|
const Range* range, int n, uint64_t* sizes,
|
||||||
uint8_t include_flags = INCLUDE_FILES) = 0;
|
uint8_t include_flags = INCLUDE_FILES) {
|
||||||
|
SizeApproximationOptions options;
|
||||||
|
options.include_memtabtles =
|
||||||
|
(include_flags & SizeApproximationFlags::INCLUDE_MEMTABLES) != 0;
|
||||||
|
options.include_files =
|
||||||
|
(include_flags & SizeApproximationFlags::INCLUDE_FILES) != 0;
|
||||||
|
GetApproximateSizes(options, column_family, range, n, sizes);
|
||||||
|
}
|
||||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
|
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
|
||||||
uint8_t include_flags = INCLUDE_FILES) {
|
uint8_t include_flags = INCLUDE_FILES) {
|
||||||
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
|
GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
|
||||||
|
@ -1093,10 +1093,6 @@ struct DBOptions {
|
|||||||
// The number of bytes to prefetch when reading the log. This is mostly useful
|
// The number of bytes to prefetch when reading the log. This is mostly useful
|
||||||
// for reading a remotely located log, as it can save the number of
|
// for reading a remotely located log, as it can save the number of
|
||||||
// round-trips. If 0, then the prefetching is disabled.
|
// round-trips. If 0, then the prefetching is disabled.
|
||||||
|
|
||||||
// If non-zero, we perform bigger reads when reading the log.
|
|
||||||
// This is mostly useful for reading a remotely located log, as it can save
|
|
||||||
// the number of round-trips. If 0, then the prefetching is disabled.
|
|
||||||
//
|
//
|
||||||
// Default: 0
|
// Default: 0
|
||||||
size_t log_readahead_size = 0;
|
size_t log_readahead_size = 0;
|
||||||
@ -1510,4 +1506,14 @@ struct ImportColumnFamilyOptions {
|
|||||||
bool move_files = false;
|
bool move_files = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Options used with DB::GetApproximateSizes()
|
||||||
|
struct SizeApproximationOptions {
|
||||||
|
// Defines whether the returned size should include the recently written
|
||||||
|
// data in the mem-tables. If set to false, include_files must be true.
|
||||||
|
bool include_memtabtles = false;
|
||||||
|
// Defines whether the returned size should include data serialized to disk.
|
||||||
|
// If set to false, include_memtabtles must be true.
|
||||||
|
bool include_files = true;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -209,10 +209,11 @@ class StackableDB : public DB {
|
|||||||
}
|
}
|
||||||
|
|
||||||
using DB::GetApproximateSizes;
|
using DB::GetApproximateSizes;
|
||||||
virtual void GetApproximateSizes(
|
virtual Status GetApproximateSizes(const SizeApproximationOptions& options,
|
||||||
ColumnFamilyHandle* column_family, const Range* r, int n, uint64_t* sizes,
|
ColumnFamilyHandle* column_family,
|
||||||
uint8_t include_flags = INCLUDE_FILES) override {
|
const Range* r, int n,
|
||||||
return db_->GetApproximateSizes(column_family, r, n, sizes, include_flags);
|
uint64_t* sizes) override {
|
||||||
|
return db_->GetApproximateSizes(options, column_family, r, n, sizes);
|
||||||
}
|
}
|
||||||
|
|
||||||
using DB::GetApproximateMemTableStats;
|
using DB::GetApproximateMemTableStats;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user