2016-02-10 00:12:00 +01:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-16 01:03:42 +02:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2014-10-31 16:48:19 +01:00
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
#include <cstring>
|
2020-06-12 18:52:14 +02:00
|
|
|
#include <iomanip>
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
#include <memory>
|
2020-06-12 18:52:14 +02:00
|
|
|
#include <sstream>
|
2014-10-31 16:48:19 +01:00
|
|
|
#include <string>
|
2020-06-12 18:52:14 +02:00
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
#include "db/version_edit.h"
|
|
|
|
#include "db/version_set.h"
|
2021-12-03 23:42:05 +01:00
|
|
|
#include "rocksdb/advanced_options.h"
|
2019-05-30 20:21:38 +02:00
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2019-05-31 02:39:43 +02:00
|
|
|
#include "util/string_util.h"
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
namespace ROCKSDB_NAMESPACE {
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
class VersionBuilderTest : public testing::Test {
|
2014-10-31 16:48:19 +01:00
|
|
|
public:
|
2014-11-11 23:28:18 +01:00
|
|
|
const Comparator* ucmp_;
|
|
|
|
InternalKeyComparator icmp_;
|
|
|
|
Options options_;
|
2021-05-05 22:59:21 +02:00
|
|
|
ImmutableOptions ioptions_;
|
2014-11-11 23:28:18 +01:00
|
|
|
MutableCFOptions mutable_cf_options_;
|
|
|
|
VersionStorageInfo vstorage_;
|
|
|
|
uint32_t file_num_;
|
|
|
|
CompactionOptionsFIFO fifo_options_;
|
|
|
|
std::vector<uint64_t> size_being_compacted_;
|
2014-10-31 16:48:19 +01:00
|
|
|
|
|
|
|
VersionBuilderTest()
|
2014-11-11 23:28:18 +01:00
|
|
|
: ucmp_(BytewiseComparator()),
|
|
|
|
icmp_(ucmp_),
|
|
|
|
ioptions_(options_),
|
2016-09-14 06:11:59 +02:00
|
|
|
mutable_cf_options_(options_),
|
2014-11-11 23:28:18 +01:00
|
|
|
vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel,
|
2016-10-08 02:21:45 +02:00
|
|
|
nullptr, false),
|
2014-11-11 23:28:18 +01:00
|
|
|
file_num_(1) {
|
|
|
|
mutable_cf_options_.RefreshDerivedOptions(ioptions_);
|
|
|
|
size_being_compacted_.resize(options_.num_levels);
|
2014-10-31 16:48:19 +01:00
|
|
|
}
|
|
|
|
|
2019-02-19 22:36:04 +01:00
|
|
|
~VersionBuilderTest() override {
|
2014-11-11 23:28:18 +01:00
|
|
|
for (int i = 0; i < vstorage_.num_levels(); i++) {
|
|
|
|
for (auto* f : vstorage_.LevelFiles(i)) {
|
2014-10-31 16:48:19 +01:00
|
|
|
if (--f->refs == 0) {
|
|
|
|
delete f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
InternalKey GetInternalKey(const char* ukey,
|
|
|
|
SequenceNumber smallest_seq = 100) {
|
|
|
|
return InternalKey(ukey, smallest_seq, kTypeValue);
|
|
|
|
}
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
void Add(int level, uint64_t file_number, const char* smallest,
|
2014-10-31 16:48:19 +01:00
|
|
|
const char* largest, uint64_t file_size = 0, uint32_t path_id = 0,
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100,
|
2014-11-11 23:28:18 +01:00
|
|
|
uint64_t num_entries = 0, uint64_t num_deletions = 0,
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
bool sampled = false, SequenceNumber smallest_seqno = 0,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
SequenceNumber largest_seqno = 0,
|
|
|
|
uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
|
2014-11-11 23:28:18 +01:00
|
|
|
assert(level < vstorage_.num_levels());
|
2019-10-15 00:19:31 +02:00
|
|
|
FileMetaData* f = new FileMetaData(
|
|
|
|
file_number, path_id, file_size, GetInternalKey(smallest, smallest_seq),
|
|
|
|
GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno,
|
2021-12-03 23:42:05 +01:00
|
|
|
/* marked_for_compact */ false, Temperature::kUnknown,
|
|
|
|
oldest_blob_file_number, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2014-10-31 16:48:19 +01:00
|
|
|
f->compensated_file_size = file_size;
|
2014-11-11 23:28:18 +01:00
|
|
|
f->num_entries = num_entries;
|
|
|
|
f->num_deletions = num_deletions;
|
2014-11-13 22:41:43 +01:00
|
|
|
vstorage_.AddFile(level, f);
|
2014-11-11 23:28:18 +01:00
|
|
|
if (sampled) {
|
|
|
|
f->init_stats_from_file = true;
|
|
|
|
vstorage_.UpdateAccumulatedStats(f);
|
|
|
|
}
|
2014-10-31 16:48:19 +01:00
|
|
|
}
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
void AddBlob(uint64_t blob_file_number, uint64_t total_blob_count,
|
|
|
|
uint64_t total_blob_bytes, std::string checksum_method,
|
2020-06-12 18:52:14 +02:00
|
|
|
std::string checksum_value,
|
|
|
|
BlobFileMetaData::LinkedSsts linked_ssts,
|
|
|
|
uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) {
|
2020-04-23 22:41:32 +02:00
|
|
|
auto shared_meta = SharedBlobFileMetaData::Create(
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
std::move(checksum_method), std::move(checksum_value));
|
2020-06-12 18:52:14 +02:00
|
|
|
auto meta =
|
|
|
|
BlobFileMetaData::Create(std::move(shared_meta), std::move(linked_ssts),
|
|
|
|
garbage_blob_count, garbage_blob_bytes);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
vstorage_.AddBlobFile(std::move(meta));
|
|
|
|
}
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
void AddDummyFile(uint64_t table_file_number, uint64_t blob_file_number) {
|
|
|
|
constexpr int level = 0;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
|
|
|
constexpr uint64_t file_size = 100;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr SequenceNumber smallest_seq = 0;
|
|
|
|
constexpr SequenceNumber largest_seq = 0;
|
|
|
|
constexpr uint64_t num_entries = 0;
|
|
|
|
constexpr uint64_t num_deletions = 0;
|
|
|
|
constexpr bool sampled = false;
|
|
|
|
|
|
|
|
Add(level, table_file_number, smallest, largest, file_size, path_id,
|
|
|
|
smallest_seq, largest_seq, num_entries, num_deletions, sampled,
|
|
|
|
smallest_seq, largest_seq, blob_file_number);
|
|
|
|
}
|
|
|
|
|
|
|
|
void AddDummyFileToEdit(VersionEdit* edit, uint64_t table_file_number,
|
|
|
|
uint64_t blob_file_number) {
|
|
|
|
assert(edit);
|
|
|
|
|
|
|
|
constexpr int level = 0;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr uint64_t file_size = 100;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
|
|
|
constexpr SequenceNumber smallest_seqno = 100;
|
|
|
|
constexpr SequenceNumber largest_seqno = 300;
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
edit->AddFile(level, table_file_number, path_id, file_size,
|
|
|
|
GetInternalKey(smallest), GetInternalKey(largest),
|
|
|
|
smallest_seqno, largest_seqno, marked_for_compaction,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, blob_file_number,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
}
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) {
|
|
|
|
assert(vstorage);
|
|
|
|
|
|
|
|
vstorage->PrepareForVersionAppend(ioptions_, mutable_cf_options_);
|
|
|
|
vstorage->SetFinalized();
|
2014-10-31 16:48:19 +01:00
|
|
|
}
|
2022-02-04 17:18:18 +01:00
|
|
|
|
|
|
|
void UpdateVersionStorageInfo() { UpdateVersionStorageInfo(&vstorage_); }
|
2014-10-31 16:48:19 +01:00
|
|
|
};
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
void UnrefFilesInVersion(VersionStorageInfo* new_vstorage) {
|
|
|
|
for (int i = 0; i < new_vstorage->num_levels(); i++) {
|
|
|
|
for (auto* f : new_vstorage->LevelFiles(i)) {
|
|
|
|
if (--f->refs == 0) {
|
|
|
|
delete f;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyAndSaveTo) {
|
2014-10-31 16:48:19 +01:00
|
|
|
Add(0, 1U, "150", "200", 100U);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
Add(1, 66U, "150", "200", 100U);
|
|
|
|
Add(1, 88U, "201", "300", 100U);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
Add(2, 6U, "150", "179", 100U);
|
|
|
|
Add(2, 7U, "180", "220", 100U);
|
|
|
|
Add(2, 8U, "221", "300", 100U);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
Add(3, 26U, "150", "170", 100U);
|
|
|
|
Add(3, 27U, "171", "179", 100U);
|
|
|
|
Add(3, 28U, "191", "220", 100U);
|
|
|
|
Add(3, 29U, "221", "300", 100U);
|
2022-02-04 17:18:18 +01:00
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("350"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2014-10-31 16:48:19 +01:00
|
|
|
version_edit.DeleteFile(3, 27U);
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2020-04-30 20:23:32 +02:00
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2014-11-11 23:28:18 +01:00
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
2016-10-08 02:21:45 +02:00
|
|
|
kCompactionStyleLevel, nullptr, false);
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2014-10-31 16:48:19 +01:00
|
|
|
ASSERT_EQ(400U, new_vstorage.NumLevelBytes(2));
|
|
|
|
ASSERT_EQ(300U, new_vstorage.NumLevelBytes(3));
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
|
|
|
|
Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U);
|
|
|
|
Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U);
|
|
|
|
|
|
|
|
Add(4, 6U, "150", "179", 100U);
|
|
|
|
Add(4, 7U, "180", "220", 100U);
|
|
|
|
Add(4, 8U, "221", "300", 100U);
|
|
|
|
|
|
|
|
Add(5, 26U, "150", "170", 100U);
|
|
|
|
Add(5, 27U, "171", "179", 100U);
|
2022-02-04 17:18:18 +01:00
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("350"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
version_edit.DeleteFile(0, 1U);
|
|
|
|
version_edit.DeleteFile(0, 88U);
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2020-04-30 20:23:32 +02:00
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
2016-10-08 02:21:45 +02:00
|
|
|
kCompactionStyleLevel, nullptr, false);
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0));
|
|
|
|
ASSERT_EQ(100U, new_vstorage.NumLevelBytes(3));
|
|
|
|
ASSERT_EQ(300U, new_vstorage.NumLevelBytes(4));
|
|
|
|
ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5));
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
|
|
|
|
Add(0, 1U, "150", "200", 100U, 0, 200U, 200U, 0, 0, false, 200U, 200U);
|
|
|
|
Add(0, 88U, "201", "300", 100U, 0, 100U, 100U, 0, 0, false, 100U, 100U);
|
|
|
|
|
|
|
|
Add(4, 6U, "150", "179", 100U);
|
|
|
|
Add(4, 7U, "180", "220", 100U);
|
|
|
|
Add(4, 8U, "221", "300", 100U);
|
|
|
|
|
|
|
|
Add(5, 26U, "150", "170", 100U);
|
|
|
|
Add(5, 27U, "171", "179", 100U);
|
2022-02-04 17:18:18 +01:00
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("350"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
version_edit.DeleteFile(0, 1U);
|
|
|
|
version_edit.DeleteFile(0, 88U);
|
|
|
|
version_edit.DeleteFile(4, 6U);
|
|
|
|
version_edit.DeleteFile(4, 7U);
|
|
|
|
version_edit.DeleteFile(4, 8U);
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2020-04-30 20:23:32 +02:00
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
2016-10-08 02:21:45 +02:00
|
|
|
kCompactionStyleLevel, nullptr, false);
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(0U, new_vstorage.NumLevelBytes(0));
|
|
|
|
ASSERT_EQ(100U, new_vstorage.NumLevelBytes(4));
|
|
|
|
ASSERT_EQ(200U, new_vstorage.NumLevelBytes(5));
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
2015-01-07 19:29:21 +01:00
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) {
|
2015-01-07 19:29:21 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("350"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("450"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("650"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("550"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("750"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
|
|
|
|
EnvOptions env_options;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
2015-01-07 19:29:21 +01:00
|
|
|
|
2020-04-30 20:23:32 +02:00
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
2015-01-07 19:29:21 +01:00
|
|
|
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
2016-10-08 02:21:45 +02:00
|
|
|
kCompactionStyleLevel, nullptr, false);
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
2015-01-07 19:29:21 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2015-01-07 19:29:21 +01:00
|
|
|
ASSERT_EQ(500U, new_vstorage.NumLevelBytes(2));
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
2015-01-07 19:29:21 +01:00
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) {
|
2015-01-07 19:29:21 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
|
|
|
|
2015-01-07 19:29:21 +01:00
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
2016-10-08 02:21:45 +02:00
|
|
|
kCompactionStyleLevel, nullptr, false);
|
2015-01-07 19:29:21 +01:00
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("350"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("450"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("650"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("550"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("750"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
2015-01-07 19:29:21 +01:00
|
|
|
|
|
|
|
VersionEdit version_edit2;
|
|
|
|
version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("950"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
version_edit2.DeleteFile(2, 616);
|
|
|
|
version_edit2.DeleteFile(2, 636);
|
|
|
|
version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"),
|
2019-10-15 00:19:31 +02:00
|
|
|
GetInternalKey("850"), 200, 200, false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2015-01-07 19:29:21 +01:00
|
|
|
|
2020-09-28 21:11:17 +02:00
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit2));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
2015-01-07 19:29:21 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2015-01-07 19:29:21 +01:00
|
|
|
ASSERT_EQ(300U, new_vstorage.NumLevelBytes(2));
|
|
|
|
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
2014-10-31 16:48:19 +01:00
|
|
|
}
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
TEST_F(VersionBuilderTest, ApplyFileDeletionIncorrectLevel) {
|
|
|
|
constexpr int level = 1;
|
|
|
|
constexpr uint64_t file_number = 2345;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
2022-02-04 17:18:18 +01:00
|
|
|
constexpr uint64_t file_size = 100;
|
2020-06-03 20:21:16 +02:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
Add(level, file_number, smallest, largest, file_size);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr int incorrect_level = 3;
|
|
|
|
|
|
|
|
edit.DeleteFile(incorrect_level, file_number);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(),
|
|
|
|
"Cannot delete table file #2345 from level 3 since "
|
|
|
|
"it is on level 1"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyFileDeletionNotInLSMTree) {
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr int level = 3;
|
|
|
|
constexpr uint64_t file_number = 1234;
|
|
|
|
|
|
|
|
edit.DeleteFile(level, file_number);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(),
|
|
|
|
"Cannot delete table file #1234 from level 3 since "
|
|
|
|
"it is not in the LSM tree"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) {
|
|
|
|
constexpr int level = 1;
|
|
|
|
constexpr uint64_t file_number = 2345;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
2020-06-06 03:14:10 +02:00
|
|
|
constexpr uint64_t file_size = 10000;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr SequenceNumber smallest_seq = 100;
|
|
|
|
constexpr SequenceNumber largest_seq = 500;
|
|
|
|
constexpr uint64_t num_entries = 0;
|
|
|
|
constexpr uint64_t num_deletions = 0;
|
|
|
|
constexpr bool sampled = false;
|
|
|
|
constexpr SequenceNumber smallest_seqno = 1;
|
|
|
|
constexpr SequenceNumber largest_seqno = 1000;
|
2020-06-03 20:21:16 +02:00
|
|
|
|
2020-06-06 03:14:10 +02:00
|
|
|
Add(level, file_number, smallest, largest, file_size, path_id, smallest_seq,
|
|
|
|
largest_seq, num_entries, num_deletions, sampled, smallest_seqno,
|
|
|
|
largest_seqno);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit deletion;
|
|
|
|
|
|
|
|
deletion.DeleteFile(level, file_number);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&deletion));
|
|
|
|
|
|
|
|
VersionEdit addition;
|
|
|
|
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
addition.AddFile(level, file_number, path_id, file_size,
|
2020-06-06 03:14:10 +02:00
|
|
|
GetInternalKey(smallest, smallest_seq),
|
|
|
|
GetInternalKey(largest, largest_seq), smallest_seqno,
|
2021-12-03 23:42:05 +01:00
|
|
|
largest_seqno, marked_for_compaction, Temperature::kUnknown,
|
|
|
|
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&addition));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
2022-02-04 17:18:18 +01:00
|
|
|
|
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
ASSERT_EQ(new_vstorage.GetFileLocation(file_number).GetLevel(), level);
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) {
|
|
|
|
constexpr int level = 1;
|
|
|
|
constexpr uint64_t file_number = 2345;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
2022-02-04 17:18:18 +01:00
|
|
|
constexpr uint64_t file_size = 10000;
|
|
|
|
|
|
|
|
Add(level, file_number, smallest, largest, file_size);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr int new_level = 2;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr SequenceNumber smallest_seqno = 100;
|
|
|
|
constexpr SequenceNumber largest_seqno = 1000;
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
edit.AddFile(new_level, file_number, path_id, file_size,
|
|
|
|
GetInternalKey(smallest), GetInternalKey(largest),
|
|
|
|
smallest_seqno, largest_seqno, marked_for_compaction,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(),
|
|
|
|
"Cannot add table file #2345 to level 2 since it is "
|
|
|
|
"already in the LSM tree on level 1"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) {
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr int level = 3;
|
|
|
|
constexpr uint64_t file_number = 2345;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr uint64_t file_size = 10000;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
|
|
|
constexpr SequenceNumber smallest_seqno = 100;
|
|
|
|
constexpr SequenceNumber largest_seqno = 1000;
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
edit.AddFile(level, file_number, path_id, file_size, GetInternalKey(smallest),
|
|
|
|
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
2021-12-03 23:42:05 +01:00
|
|
|
marked_for_compaction, Temperature::kUnknown,
|
|
|
|
kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
VersionEdit other_edit;
|
|
|
|
|
|
|
|
constexpr int new_level = 2;
|
|
|
|
|
|
|
|
other_edit.AddFile(new_level, file_number, path_id, file_size,
|
|
|
|
GetInternalKey(smallest), GetInternalKey(largest),
|
|
|
|
smallest_seqno, largest_seqno, marked_for_compaction,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
const Status s = builder.Apply(&other_edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(),
|
|
|
|
"Cannot add table file #2345 to level 2 since it is "
|
|
|
|
"already in the LSM tree on level 3"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) {
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
constexpr int level = 1;
|
|
|
|
constexpr uint64_t file_number = 2345;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr uint64_t file_size = 10000;
|
|
|
|
constexpr char smallest[] = "bar";
|
|
|
|
constexpr char largest[] = "foo";
|
|
|
|
constexpr SequenceNumber smallest_seqno = 100;
|
|
|
|
constexpr SequenceNumber largest_seqno = 1000;
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit addition;
|
|
|
|
|
|
|
|
addition.AddFile(level, file_number, path_id, file_size,
|
|
|
|
GetInternalKey(smallest), GetInternalKey(largest),
|
|
|
|
smallest_seqno, largest_seqno, marked_for_compaction,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-06-03 20:21:16 +02:00
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&addition));
|
|
|
|
|
|
|
|
VersionEdit deletion;
|
|
|
|
|
|
|
|
deletion.DeleteFile(level, file_number);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&deletion));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
2022-02-04 17:18:18 +01:00
|
|
|
|
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2020-06-03 20:21:16 +02:00
|
|
|
ASSERT_FALSE(new_vstorage.GetFileLocation(file_number).IsValid());
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileAddition) {
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
2021-06-22 18:48:50 +02:00
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
// Add dummy table file to ensure the blob file is referenced.
|
|
|
|
constexpr uint64_t table_file_number = 1;
|
|
|
|
AddDummyFileToEdit(&edit, table_file_number, blob_file_number);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
const auto& new_blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(new_blob_files.size(), 1);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
ASSERT_NE(new_meta, nullptr);
|
|
|
|
ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
ASSERT_EQ(new_meta->GetLinkedSsts(),
|
|
|
|
BlobFileMetaData::LinkedSsts{table_file_number});
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobCount(), 0);
|
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobBytes(), 0);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyInBase) {
|
|
|
|
// Attempt to add a blob file that is already present in the base version.
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
2021-06-22 18:48:50 +02:00
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
constexpr uint64_t garbage_blob_count = 123;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 456789;
|
|
|
|
|
|
|
|
AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
|
2020-06-12 18:52:14 +02:00
|
|
|
checksum_value, BlobFileMetaData::LinkedSsts(), garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileAdditionAlreadyApplied) {
|
|
|
|
// Attempt to add the same blob file twice using version edits.
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
2021-06-22 18:48:50 +02:00
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 already added"));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) {
|
|
|
|
// Increase the amount of garbage for a blob file present in the base version.
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
constexpr uint64_t table_file_number = 1;
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
2021-06-22 18:48:50 +02:00
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
constexpr uint64_t garbage_blob_count = 123;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 456789;
|
|
|
|
|
|
|
|
AddBlob(blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
checksum_value, BlobFileMetaData::LinkedSsts{table_file_number},
|
|
|
|
garbage_blob_count, garbage_blob_bytes);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto meta = vstorage_.GetBlobFileMetaData(blob_file_number);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_NE(meta, nullptr);
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
// Add dummy table file to ensure the blob file is referenced.
|
|
|
|
AddDummyFile(table_file_number, blob_file_number);
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr uint64_t new_garbage_blob_count = 456;
|
|
|
|
constexpr uint64_t new_garbage_blob_bytes = 111111;
|
|
|
|
|
|
|
|
edit.AddBlobFileGarbage(blob_file_number, new_garbage_blob_count,
|
|
|
|
new_garbage_blob_bytes);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
const auto& new_blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(new_blob_files.size(), 1);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
ASSERT_NE(new_meta, nullptr);
|
|
|
|
ASSERT_EQ(new_meta->GetSharedMeta(), meta->GetSharedMeta());
|
|
|
|
ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
ASSERT_EQ(new_meta->GetLinkedSsts(),
|
|
|
|
BlobFileMetaData::LinkedSsts{table_file_number});
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobCount(),
|
|
|
|
garbage_blob_count + new_garbage_blob_count);
|
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobBytes(),
|
|
|
|
garbage_blob_bytes + new_garbage_blob_bytes);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) {
|
|
|
|
// Increase the amount of garbage for a blob file added using a version edit.
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit addition;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
2021-06-22 18:48:50 +02:00
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
// Add dummy table file to ensure the blob file is referenced.
|
|
|
|
constexpr uint64_t table_file_number = 1;
|
|
|
|
AddDummyFileToEdit(&addition, table_file_number, blob_file_number);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_OK(builder.Apply(&addition));
|
|
|
|
|
|
|
|
constexpr uint64_t garbage_blob_count = 123;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 456789;
|
|
|
|
|
|
|
|
VersionEdit garbage;
|
|
|
|
|
|
|
|
garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&garbage));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
const auto& new_blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(new_blob_files.size(), 1);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto new_meta = new_vstorage.GetBlobFileMetaData(blob_file_number);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
ASSERT_NE(new_meta, nullptr);
|
|
|
|
ASSERT_EQ(new_meta->GetBlobFileNumber(), blob_file_number);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobCount(), total_blob_count);
|
|
|
|
ASSERT_EQ(new_meta->GetTotalBlobBytes(), total_blob_bytes);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumMethod(), checksum_method);
|
|
|
|
ASSERT_EQ(new_meta->GetChecksumValue(), checksum_value);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
ASSERT_EQ(new_meta->GetLinkedSsts(),
|
|
|
|
BlobFileMetaData::LinkedSsts{table_file_number});
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobCount(), garbage_blob_count);
|
|
|
|
ASSERT_EQ(new_meta->GetGarbageBlobBytes(), garbage_blob_bytes);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileNotFound) {
|
|
|
|
// Attempt to increase the amount of garbage for a blob file that is
|
|
|
|
// neither in the base version, nor was it added using a version edit.
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t garbage_blob_count = 5678;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 999999;
|
|
|
|
|
|
|
|
edit.AddBlobFileGarbage(blob_file_number, garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&edit);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(std::strstr(s.getState(), "Blob file #1234 not found"));
|
|
|
|
}
|
|
|
|
|
2021-11-01 20:31:13 +01:00
|
|
|
TEST_F(VersionBuilderTest, BlobFileGarbageOverflow) {
|
|
|
|
// Test that VersionEdits that would result in the count/total size of garbage
|
|
|
|
// exceeding the count/total size of all blobs are rejected.
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2021-11-01 20:31:13 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit addition;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 1234;
|
|
|
|
constexpr uint64_t total_blob_count = 5678;
|
|
|
|
constexpr uint64_t total_blob_bytes = 999999;
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
|
|
|
constexpr char checksum_value[] =
|
|
|
|
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
|
|
|
|
"\x5c\xbd";
|
|
|
|
|
|
|
|
addition.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
|
|
|
// Add dummy table file to ensure the blob file is referenced.
|
|
|
|
constexpr uint64_t table_file_number = 1;
|
|
|
|
AddDummyFileToEdit(&addition, table_file_number, blob_file_number);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&addition));
|
|
|
|
|
|
|
|
{
|
|
|
|
// Garbage blob count overflow
|
|
|
|
constexpr uint64_t garbage_blob_count = 5679;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 999999;
|
|
|
|
|
|
|
|
VersionEdit garbage;
|
|
|
|
|
|
|
|
garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&garbage);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(
|
|
|
|
std::strstr(s.getState(), "Garbage overflow for blob file #1234"));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
// Garbage blob bytes overflow
|
|
|
|
constexpr uint64_t garbage_blob_count = 5678;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 1000000;
|
|
|
|
|
|
|
|
VersionEdit garbage;
|
|
|
|
|
|
|
|
garbage.AddBlobFileGarbage(blob_file_number, garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
|
|
|
|
|
|
|
const Status s = builder.Apply(&garbage);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(
|
|
|
|
std::strstr(s.getState(), "Garbage overflow for blob file #1234"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
TEST_F(VersionBuilderTest, SaveBlobFilesTo) {
|
|
|
|
// Add three blob files to base version.
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
for (uint64_t i = 1; i <= 3; ++i) {
|
|
|
|
const uint64_t table_file_number = 2 * i;
|
|
|
|
const uint64_t blob_file_number = 2 * i + 1;
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
const uint64_t total_blob_count = i * 1000;
|
|
|
|
const uint64_t total_blob_bytes = i * 1000000;
|
|
|
|
const uint64_t garbage_blob_count = i * 100;
|
|
|
|
const uint64_t garbage_blob_bytes = i * 20000;
|
|
|
|
|
|
|
|
AddBlob(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
/* checksum_method */ std::string(),
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
/* checksum_value */ std::string(),
|
|
|
|
BlobFileMetaData::LinkedSsts{table_file_number}, garbage_blob_count,
|
|
|
|
garbage_blob_bytes);
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add dummy table files to ensure the blob files are referenced.
|
|
|
|
// Note: files are added to L0, so they have to be added in reverse order
|
|
|
|
// (newest first).
|
|
|
|
for (uint64_t i = 3; i >= 1; --i) {
|
|
|
|
const uint64_t table_file_number = 2 * i;
|
|
|
|
const uint64_t blob_file_number = 2 * i + 1;
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
AddDummyFile(table_file_number, blob_file_number);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
}
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
// Add some garbage to the second and third blob files. The second blob file
|
|
|
|
// remains valid since it does not consist entirely of garbage yet. The third
|
|
|
|
// blob file is all garbage after the edit and will not be part of the new
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
// version. The corresponding dummy table file is also removed for
|
|
|
|
// consistency.
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
edit.AddBlobFileGarbage(/* blob_file_number */ 5,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* garbage_blob_count */ 200,
|
|
|
|
/* garbage_blob_bytes */ 100000);
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
edit.AddBlobFileGarbage(/* blob_file_number */ 7,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* garbage_blob_count */ 2700,
|
|
|
|
/* garbage_blob_bytes */ 2940000);
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
edit.DeleteFile(/* level */ 0, /* file_number */ 6);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
// Add a fourth blob file.
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
edit.AddBlobFile(/* blob_file_number */ 9, /* total_blob_count */ 4000,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* total_blob_bytes */ 4000000,
|
|
|
|
/* checksum_method */ std::string(),
|
|
|
|
/* checksum_value */ std::string());
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = false;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
const auto& new_blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(new_blob_files.size(), 3);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto meta3 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
ASSERT_NE(meta3, nullptr);
|
|
|
|
ASSERT_EQ(meta3->GetBlobFileNumber(), 3);
|
|
|
|
ASSERT_EQ(meta3->GetTotalBlobCount(), 1000);
|
|
|
|
ASSERT_EQ(meta3->GetTotalBlobBytes(), 1000000);
|
|
|
|
ASSERT_EQ(meta3->GetGarbageBlobCount(), 100);
|
|
|
|
ASSERT_EQ(meta3->GetGarbageBlobBytes(), 20000);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto meta5 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 5);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
ASSERT_NE(meta5, nullptr);
|
|
|
|
ASSERT_EQ(meta5->GetBlobFileNumber(), 5);
|
|
|
|
ASSERT_EQ(meta5->GetTotalBlobCount(), 2000);
|
|
|
|
ASSERT_EQ(meta5->GetTotalBlobBytes(), 2000000);
|
|
|
|
ASSERT_EQ(meta5->GetGarbageBlobCount(), 400);
|
|
|
|
ASSERT_EQ(meta5->GetGarbageBlobBytes(), 140000);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto meta9 = new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 9);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
ASSERT_NE(meta9, nullptr);
|
|
|
|
ASSERT_EQ(meta9->GetBlobFileNumber(), 9);
|
|
|
|
ASSERT_EQ(meta9->GetTotalBlobCount(), 4000);
|
|
|
|
ASSERT_EQ(meta9->GetTotalBlobBytes(), 4000000);
|
|
|
|
ASSERT_EQ(meta9->GetGarbageBlobCount(), 0);
|
|
|
|
ASSERT_EQ(meta9->GetGarbageBlobBytes(), 0);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
// Delete the first table file, which makes the first blob file obsolete
|
|
|
|
// since it's at the head and unreferenced.
|
|
|
|
VersionBuilder second_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&new_vstorage, version_set);
|
|
|
|
|
|
|
|
VersionEdit second_edit;
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
second_edit.DeleteFile(/* level */ 0, /* file_number */ 2);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
ASSERT_OK(second_builder.Apply(&second_edit));
|
|
|
|
|
|
|
|
VersionStorageInfo newer_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &new_vstorage,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(second_builder.SaveTo(&newer_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&newer_vstorage);
|
|
|
|
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
const auto& newer_blob_files = newer_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(newer_blob_files.size(), 2);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto newer_meta3 =
|
|
|
|
newer_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
ASSERT_EQ(newer_meta3, nullptr);
|
Clean up blob files based on the linked SST set (#7001)
Summary:
The earlier `VersionBuilder` code only cleaned up blob files that were
marked as entirely consisting of garbage using `VersionEdits` with
`BlobFileGarbage`. This covers the cases when table files go through
regular compaction, where we iterate through the KVs and thus have an
opportunity to calculate the amount of garbage (that is, most cases).
However, it does not help when table files are simply dropped (e.g. deletion
compactions or the `DeleteFile` API). To deal with such cases, the patch
adds logic that cleans up all blob files at the head of the list until the first
one with linked SSTs is found. (As an example, let's assume we have blob files
with numbers 1..10, and the first one with any linked SSTs is number 8.
This means that SSTs in the `Version` only rely on blob files with numbers >= 8,
and thus 1..7 are no longer needed.)
The code change itself is pretty small; however, changing the logic like this
necessitated changes to some tests that have been added recently (namely
to the ones that use blob files in isolation, i.e. without any table files referring
to them). Some of these cases were fixed by bypassing `VersionBuilder` altogether
in order to keep the tests simple (which actually makes them more proper unit tests
as well), while the `VersionBuilder` unit tests were fixed by adding dummy table
files to the test cases as needed.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/7001
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D22119474
Pulled By: ltamasi
fbshipit-source-id: c6547141355667d4291d9661d6518eb741e7b54a
2020-07-01 00:30:01 +02:00
|
|
|
|
|
|
|
UnrefFilesInVersion(&newer_vstorage);
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
}
|
|
|
|
|
2020-09-09 19:23:52 +02:00
|
|
|
TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) {
|
|
|
|
// When multiple background jobs (flushes/compactions) are executing in
|
|
|
|
// parallel, it is possible for the VersionEdit adding blob file K to be
|
|
|
|
// applied *after* the VersionEdit adding blob file N (for N > K). This test
|
|
|
|
// case makes sure this is handled correctly.
|
|
|
|
|
|
|
|
// Add blob file #4 (referenced by table file #3) to base version.
|
|
|
|
constexpr uint64_t base_table_file_number = 3;
|
|
|
|
constexpr uint64_t base_blob_file_number = 4;
|
|
|
|
constexpr uint64_t base_total_blob_count = 100;
|
|
|
|
constexpr uint64_t base_total_blob_bytes = 1 << 20;
|
|
|
|
|
|
|
|
constexpr char checksum_method[] = "SHA1";
|
|
|
|
constexpr char checksum_value[] = "\xfa\xce\xb0\x0c";
|
|
|
|
constexpr uint64_t garbage_blob_count = 0;
|
|
|
|
constexpr uint64_t garbage_blob_bytes = 0;
|
|
|
|
|
|
|
|
AddDummyFile(base_table_file_number, base_blob_file_number);
|
|
|
|
AddBlob(base_blob_file_number, base_total_blob_count, base_total_blob_bytes,
|
|
|
|
checksum_method, checksum_value,
|
|
|
|
BlobFileMetaData::LinkedSsts{base_table_file_number},
|
|
|
|
garbage_blob_count, garbage_blob_bytes);
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
2020-09-09 19:23:52 +02:00
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
// Add blob file #2 (referenced by table file #1).
|
|
|
|
constexpr int level = 0;
|
|
|
|
constexpr uint64_t table_file_number = 1;
|
|
|
|
constexpr uint32_t path_id = 0;
|
|
|
|
constexpr uint64_t file_size = 1 << 12;
|
|
|
|
constexpr char smallest[] = "key1";
|
|
|
|
constexpr char largest[] = "key987";
|
|
|
|
constexpr SequenceNumber smallest_seqno = 0;
|
|
|
|
constexpr SequenceNumber largest_seqno = 0;
|
|
|
|
constexpr bool marked_for_compaction = false;
|
|
|
|
|
|
|
|
constexpr uint64_t blob_file_number = 2;
|
|
|
|
static_assert(blob_file_number < base_blob_file_number,
|
|
|
|
"Added blob file should have a smaller file number");
|
|
|
|
|
|
|
|
constexpr uint64_t total_blob_count = 234;
|
|
|
|
constexpr uint64_t total_blob_bytes = 1 << 22;
|
|
|
|
|
2021-12-03 23:42:05 +01:00
|
|
|
edit.AddFile(
|
|
|
|
level, table_file_number, path_id, file_size, GetInternalKey(smallest),
|
|
|
|
GetInternalKey(largest), smallest_seqno, largest_seqno,
|
|
|
|
marked_for_compaction, Temperature::kUnknown, blob_file_number,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime, checksum_value,
|
|
|
|
checksum_method, kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-09-09 19:23:52 +02:00
|
|
|
edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
|
|
|
|
checksum_method, checksum_value);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2020-09-09 19:23:52 +02:00
|
|
|
const auto& new_blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(new_blob_files.size(), 2);
|
|
|
|
|
|
|
|
const auto base_meta =
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
new_vstorage.GetBlobFileMetaData(base_blob_file_number);
|
2020-09-09 19:23:52 +02:00
|
|
|
|
|
|
|
ASSERT_NE(base_meta, nullptr);
|
|
|
|
ASSERT_EQ(base_meta->GetBlobFileNumber(), base_blob_file_number);
|
|
|
|
ASSERT_EQ(base_meta->GetTotalBlobCount(), base_total_blob_count);
|
|
|
|
ASSERT_EQ(base_meta->GetTotalBlobBytes(), base_total_blob_bytes);
|
|
|
|
ASSERT_EQ(base_meta->GetGarbageBlobCount(), garbage_blob_count);
|
|
|
|
ASSERT_EQ(base_meta->GetGarbageBlobBytes(), garbage_blob_bytes);
|
|
|
|
ASSERT_EQ(base_meta->GetChecksumMethod(), checksum_method);
|
|
|
|
ASSERT_EQ(base_meta->GetChecksumValue(), checksum_value);
|
|
|
|
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
const auto added_meta = new_vstorage.GetBlobFileMetaData(blob_file_number);
|
2020-09-09 19:23:52 +02:00
|
|
|
|
|
|
|
ASSERT_NE(added_meta, nullptr);
|
|
|
|
ASSERT_EQ(added_meta->GetBlobFileNumber(), blob_file_number);
|
|
|
|
ASSERT_EQ(added_meta->GetTotalBlobCount(), total_blob_count);
|
|
|
|
ASSERT_EQ(added_meta->GetTotalBlobBytes(), total_blob_bytes);
|
|
|
|
ASSERT_EQ(added_meta->GetGarbageBlobCount(), garbage_blob_count);
|
|
|
|
ASSERT_EQ(added_meta->GetGarbageBlobBytes(), garbage_blob_bytes);
|
|
|
|
ASSERT_EQ(added_meta->GetChecksumMethod(), checksum_method);
|
|
|
|
ASSERT_EQ(added_meta->GetChecksumValue(), checksum_value);
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) {
|
|
|
|
// Initialize base version. The first table file points to a valid blob file
|
|
|
|
// in this version; the second one does not refer to any blob files.
|
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150",
|
|
|
|
/* largest */ "200", /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100,
|
|
|
|
/* oldest_blob_file_number */ 16);
|
|
|
|
Add(/* level */ 1, /* file_number */ 23, /* smallest */ "201",
|
|
|
|
/* largest */ "300", /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ 200, /* largest_seq */ 200,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ 200, /* largest_seqno */ 200,
|
|
|
|
kInvalidBlobFileNumber);
|
|
|
|
|
|
|
|
AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000,
|
|
|
|
/* total_blob_bytes */ 1000000,
|
|
|
|
/* checksum_method */ std::string(),
|
2020-06-12 18:52:14 +02:00
|
|
|
/* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1},
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
// Add a new table file that points to the existing blob file, and add a
|
|
|
|
// new table file--blob file pair.
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
edit.AddFile(/* level */ 1, /* file_number */ 606, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("701"),
|
|
|
|
/* largest */ GetInternalKey("750"), /* smallest_seqno */ 200,
|
|
|
|
/* largest_seqno */ 200, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("801"),
|
|
|
|
/* largest */ GetInternalKey("850"), /* smallest_seqno */ 200,
|
|
|
|
/* largest_seqno */ 200, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown,
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000,
|
|
|
|
/* total_blob_bytes */ 200000,
|
|
|
|
/* checksum_method */ std::string(),
|
|
|
|
/* checksum_value */ std::string());
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
// Save to a new version in order to trigger consistency checks.
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
2020-06-12 18:52:14 +02:00
|
|
|
TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) {
|
|
|
|
// Initialize base version. Links between the table file and the blob file
|
|
|
|
// are inconsistent.
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150",
|
|
|
|
/* largest */ "200", /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100,
|
|
|
|
/* oldest_blob_file_number */ 256);
|
|
|
|
|
|
|
|
AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000,
|
|
|
|
/* total_blob_bytes */ 1000000,
|
|
|
|
/* checksum_method */ std::string(),
|
2020-06-12 18:52:14 +02:00
|
|
|
/* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1},
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* garbage_blob_count */ 500, /* garbage_blob_bytes */ 300000);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
// Save to a new version in order to trigger consistency checks.
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
const Status s = builder.SaveTo(&new_vstorage);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
2020-06-12 18:52:14 +02:00
|
|
|
ASSERT_TRUE(std::strstr(
|
|
|
|
s.getState(),
|
|
|
|
"Links are inconsistent between table files and blob file #16"));
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) {
|
|
|
|
// Initialize base version. The table file points to a blob file that is
|
|
|
|
// all garbage.
|
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150",
|
|
|
|
/* largest */ "200", /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100,
|
|
|
|
/* oldest_blob_file_number */ 16);
|
|
|
|
|
|
|
|
AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000,
|
|
|
|
/* total_blob_bytes */ 1000000,
|
|
|
|
/* checksum_method */ std::string(),
|
2020-06-12 18:52:14 +02:00
|
|
|
/* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1},
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
/* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
2020-04-30 20:23:32 +02:00
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
Add blob files to VersionStorageInfo/VersionBuilder (#6597)
Summary:
The patch adds a couple of classes to represent metadata about
blob files: `SharedBlobFileMetaData` contains the information elements
that are immutable (once the blob file is closed), e.g. blob file number,
total number and size of blob files, checksum method/value, while
`BlobFileMetaData` contains attributes that can vary across versions like
the amount of garbage in the file. There is a single `SharedBlobFileMetaData`
for each blob file, which is jointly owned by the `BlobFileMetaData` objects
that point to it; `BlobFileMetaData` objects, in turn, are owned by `Version`s
and can also be shared if the (immutable _and_ mutable) state of the blob file
is the same in two versions.
In addition, the patch adds the blob file metadata to `VersionStorageInfo`, and extends
`VersionBuilder` so that it can apply blob file related `VersionEdit`s (i.e. those
containing `BlobFileAddition`s and/or `BlobFileGarbage`), and save blob file metadata
to a new `VersionStorageInfo`. Consistency checks are also extended to ensure
that table files point to blob files that are part of the `Version`, and that all blob files
that are part of any given `Version` have at least some _non_-garbage data in them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6597
Test Plan: `make check`
Reviewed By: riversand963
Differential Revision: D20656803
Pulled By: ltamasi
fbshipit-source-id: f1f74d135045b3b42d0146f03ee576ef0a4bfd80
2020-03-27 02:48:55 +01:00
|
|
|
|
|
|
|
// Save to a new version in order to trigger consistency checks.
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
const Status s = builder.SaveTo(&new_vstorage);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(
|
|
|
|
std::strstr(s.getState(), "Blob file #16 consists entirely of garbage"));
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
2020-06-12 18:52:14 +02:00
|
|
|
TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) {
|
|
|
|
// Initialize base version, with a table file pointing to a blob file
|
|
|
|
// that has no garbage at this point.
|
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ 1, /* smallest */ "150",
|
|
|
|
/* largest */ "200", /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ 100, /* largest_seq */ 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ 100, /* largest_seqno */ 100,
|
|
|
|
/* oldest_blob_file_number */ 16);
|
|
|
|
|
|
|
|
AddBlob(/* blob_file_number */ 16, /* total_blob_count */ 1000,
|
|
|
|
/* total_blob_bytes */ 1000000,
|
|
|
|
/* checksum_method */ std::string(),
|
|
|
|
/* checksum_value */ std::string(), BlobFileMetaData::LinkedSsts{1},
|
|
|
|
/* garbage_blob_count */ 0, /* garbage_blob_bytes */ 0);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
// Mark the entire blob file garbage but do not remove the linked SST.
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
edit.AddBlobFileGarbage(/* blob_file_number */ 16,
|
|
|
|
/* garbage_blob_count */ 1000,
|
|
|
|
/* garbage_blob_bytes */ 1000000);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
|
|
|
|
// Save to a new version in order to trigger consistency checks.
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
const Status s = builder.SaveTo(&new_vstorage);
|
|
|
|
ASSERT_TRUE(s.IsCorruption());
|
|
|
|
ASSERT_TRUE(
|
|
|
|
std::strstr(s.getState(), "Blob file #16 consists entirely of garbage"));
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) {
|
|
|
|
// Initialize base version. Table files 1..10 are linked to blob files 1..5,
|
|
|
|
// while table files 11..20 are not linked to any blob files.
|
|
|
|
|
|
|
|
for (uint64_t i = 1; i <= 10; ++i) {
|
|
|
|
std::ostringstream oss;
|
|
|
|
oss << std::setw(2) << std::setfill('0') << i;
|
|
|
|
|
|
|
|
const std::string key = oss.str();
|
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(),
|
|
|
|
/* largest */ key.c_str(), /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ i * 100,
|
|
|
|
/* largest_seqno */ i * 100,
|
|
|
|
/* oldest_blob_file_number */ ((i - 1) % 5) + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (uint64_t i = 1; i <= 5; ++i) {
|
|
|
|
AddBlob(/* blob_file_number */ i, /* total_blob_count */ 2000,
|
|
|
|
/* total_blob_bytes */ 2000000,
|
|
|
|
/* checksum_method */ std::string(),
|
|
|
|
/* checksum_value */ std::string(),
|
|
|
|
BlobFileMetaData::LinkedSsts{i, i + 5},
|
|
|
|
/* garbage_blob_count */ 1000, /* garbage_blob_bytes */ 1000000);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (uint64_t i = 11; i <= 20; ++i) {
|
|
|
|
std::ostringstream oss;
|
|
|
|
oss << std::setw(2) << std::setfill('0') << i;
|
|
|
|
|
|
|
|
const std::string key = oss.str();
|
|
|
|
|
|
|
|
Add(/* level */ 1, /* file_number */ i, /* smallest */ key.c_str(),
|
|
|
|
/* largest */ key.c_str(), /* file_size */ 100,
|
|
|
|
/* path_id */ 0, /* smallest_seq */ i * 100, /* largest_seq */ i * 100,
|
|
|
|
/* num_entries */ 0, /* num_deletions */ 0,
|
|
|
|
/* sampled */ false, /* smallest_seqno */ i * 100,
|
|
|
|
/* largest_seqno */ i * 100, kInvalidBlobFileNumber);
|
|
|
|
}
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
{
|
|
|
|
const auto& blob_files = vstorage_.GetBlobFiles();
|
|
|
|
ASSERT_EQ(blob_files.size(), 5);
|
|
|
|
|
|
|
|
const std::vector<BlobFileMetaData::LinkedSsts> expected_linked_ssts{
|
|
|
|
{1, 6}, {2, 7}, {3, 8}, {4, 9}, {5, 10}};
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 5; ++i) {
|
|
|
|
const auto meta =
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
vstorage_.GetBlobFileMetaData(/* blob_file_number */ i + 1);
|
2020-06-12 18:52:14 +02:00
|
|
|
ASSERT_NE(meta, nullptr);
|
|
|
|
ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
VersionEdit edit;
|
|
|
|
|
|
|
|
// Add an SST that references a blob file.
|
|
|
|
edit.AddFile(
|
|
|
|
/* level */ 1, /* file_number */ 21, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("21", 2100),
|
|
|
|
/* largest */ GetInternalKey("21", 2100), /* smallest_seqno */ 2100,
|
|
|
|
/* largest_seqno */ 2100, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown,
|
2020-06-12 18:52:14 +02:00
|
|
|
/* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
// Add an SST that does not reference any blob files.
|
|
|
|
edit.AddFile(
|
|
|
|
/* level */ 1, /* file_number */ 22, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("22", 2200),
|
|
|
|
/* largest */ GetInternalKey("22", 2200), /* smallest_seqno */ 2200,
|
|
|
|
/* largest_seqno */ 2200, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
|
2020-06-12 18:52:14 +02:00
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
// Delete a file that references a blob file.
|
|
|
|
edit.DeleteFile(/* level */ 1, /* file_number */ 6);
|
|
|
|
|
|
|
|
// Delete a file that does not reference any blob files.
|
|
|
|
edit.DeleteFile(/* level */ 1, /* file_number */ 16);
|
|
|
|
|
|
|
|
// Trivially move a file that references a blob file. Note that we save
|
|
|
|
// the original BlobFileMetaData object so we can check that no new object
|
|
|
|
// gets created.
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
auto meta3 = vstorage_.GetBlobFileMetaData(/* blob_file_number */ 3);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
edit.DeleteFile(/* level */ 1, /* file_number */ 3);
|
|
|
|
edit.AddFile(/* level */ 2, /* file_number */ 3, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("03", 300),
|
|
|
|
/* largest */ GetInternalKey("03", 300),
|
|
|
|
/* smallest_seqno */ 300,
|
|
|
|
/* largest_seqno */ 300, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown,
|
2020-06-12 18:52:14 +02:00
|
|
|
/* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
// Trivially move a file that does not reference any blob files.
|
|
|
|
edit.DeleteFile(/* level */ 1, /* file_number */ 13);
|
|
|
|
edit.AddFile(/* level */ 2, /* file_number */ 13, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("13", 1300),
|
|
|
|
/* largest */ GetInternalKey("13", 1300),
|
|
|
|
/* smallest_seqno */ 1300,
|
|
|
|
/* largest_seqno */ 1300, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown, kInvalidBlobFileNumber,
|
|
|
|
kUnknownOldestAncesterTime, kUnknownFileCreationTime,
|
|
|
|
kUnknownFileChecksum, kUnknownFileChecksumFuncName,
|
|
|
|
kDisableUserTimestamp, kDisableUserTimestamp);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
// Add one more SST file that references a blob file, then promptly
|
|
|
|
// delete it in a second version edit before the new version gets saved.
|
|
|
|
// This file should not show up as linked to the blob file in the new version.
|
|
|
|
edit.AddFile(/* level */ 1, /* file_number */ 23, /* path_id */ 0,
|
|
|
|
/* file_size */ 100, /* smallest */ GetInternalKey("23", 2300),
|
|
|
|
/* largest */ GetInternalKey("23", 2300),
|
|
|
|
/* smallest_seqno */ 2300,
|
|
|
|
/* largest_seqno */ 2300, /* marked_for_compaction */ false,
|
2021-12-03 23:42:05 +01:00
|
|
|
Temperature::kUnknown,
|
2020-06-12 18:52:14 +02:00
|
|
|
/* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime,
|
|
|
|
kUnknownFileCreationTime, kUnknownFileChecksum,
|
2021-11-10 19:47:53 +01:00
|
|
|
kUnknownFileChecksumFuncName, kDisableUserTimestamp,
|
|
|
|
kDisableUserTimestamp);
|
2020-06-12 18:52:14 +02:00
|
|
|
|
|
|
|
VersionEdit edit2;
|
|
|
|
|
|
|
|
edit2.DeleteFile(/* level */ 1, /* file_number */ 23);
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder builder(env_options, &ioptions_, table_cache, &vstorage_,
|
|
|
|
version_set);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.Apply(&edit));
|
|
|
|
ASSERT_OK(builder.Apply(&edit2));
|
|
|
|
|
|
|
|
constexpr bool force_consistency_checks = true;
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, &vstorage_,
|
|
|
|
force_consistency_checks);
|
|
|
|
|
|
|
|
ASSERT_OK(builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2020-06-12 18:52:14 +02:00
|
|
|
{
|
|
|
|
const auto& blob_files = new_vstorage.GetBlobFiles();
|
|
|
|
ASSERT_EQ(blob_files.size(), 5);
|
|
|
|
|
|
|
|
const std::vector<BlobFileMetaData::LinkedSsts> expected_linked_ssts{
|
|
|
|
{1, 21}, {2, 7}, {3, 8}, {4, 9}, {5, 10}};
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 5; ++i) {
|
|
|
|
const auto meta =
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
new_vstorage.GetBlobFileMetaData(/* blob_file_number */ i + 1);
|
2020-06-12 18:52:14 +02:00
|
|
|
ASSERT_NE(meta, nullptr);
|
|
|
|
ASSERT_EQ(meta->GetLinkedSsts(), expected_linked_ssts[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure that no new BlobFileMetaData got created for the blob file
|
|
|
|
// affected by the trivial move.
|
Use a sorted vector instead of a map to store blob file metadata (#9526)
Summary:
The patch replaces `std::map` with a sorted `std::vector` for
`VersionStorageInfo::blob_files_` and preallocates the space
for the `vector` before saving the `BlobFileMetaData` into the
new `VersionStorageInfo` in `VersionBuilder::Rep::SaveBlobFilesTo`.
These changes reduce the time the DB mutex is held while
saving new `Version`s, and using a sorted `vector` also makes
lookups faster thanks to better memory locality.
In addition, the patch introduces helper methods
`VersionStorageInfo::GetBlobFileMetaData` and
`VersionStorageInfo::GetBlobFileMetaDataLB` that can be used by
clients to perform lookups in the `vector`, and does some general
cleanup in the parts of code where blob file metadata are used.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9526
Test Plan:
Ran `make check` and the crash test script for a while.
Performance was tested using a load-optimized benchmark (`fillseq` with vector memtable, no WAL) and small file sizes so that a significant number of files are produced:
```
numactl --interleave=all ./db_bench --benchmarks=fillseq --allow_concurrent_memtable_write=false --level0_file_num_compaction_trigger=4 --level0_slowdown_writes_trigger=20 --level0_stop_writes_trigger=30 --max_background_jobs=8 --max_write_buffer_number=8 --db=/data/ltamasi-dbbench --wal_dir=/data/ltamasi-dbbench --num=800000000 --num_levels=8 --key_size=20 --value_size=400 --block_size=8192 --cache_size=51539607552 --cache_numshardbits=6 --compression_max_dict_bytes=0 --compression_ratio=0.5 --compression_type=lz4 --bytes_per_sync=8388608 --cache_index_and_filter_blocks=1 --cache_high_pri_pool_ratio=0.5 --benchmark_write_rate_limit=0 --write_buffer_size=16777216 --target_file_size_base=16777216 --max_bytes_for_level_base=67108864 --verify_checksum=1 --delete_obsolete_files_period_micros=62914560 --max_bytes_for_level_multiplier=8 --statistics=0 --stats_per_interval=1 --stats_interval_seconds=20 --histogram=1 --memtablerep=skip_list --bloom_bits=10 --open_files=-1 --subcompactions=1 --compaction_style=0 --min_level_to_compress=3 --level_compaction_dynamic_level_bytes=true --pin_l0_filter_and_index_blocks_in_cache=1 --soft_pending_compaction_bytes_limit=167503724544 --hard_pending_compaction_bytes_limit=335007449088 --min_level_to_compress=0 --use_existing_db=0 --sync=0 --threads=1 --memtablerep=vector --allow_concurrent_memtable_write=false --disable_wal=1 --enable_blob_files=1 --blob_file_size=16777216 --min_blob_size=0 --blob_compression_type=lz4 --enable_blob_garbage_collection=1 --seed=<some value>
```
Final statistics before the patch:
```
Cumulative writes: 0 writes, 700M keys, 0 commit groups, 0.0 writes per commit group, ingest: 284.62 GB, 121.27 MB/s
Interval writes: 0 writes, 334K keys, 0 commit groups, 0.0 writes per commit group, ingest: 139.28 MB, 72.46 MB/s
```
With the patch:
```
Cumulative writes: 0 writes, 760M keys, 0 commit groups, 0.0 writes per commit group, ingest: 308.66 GB, 131.52 MB/s
Interval writes: 0 writes, 445K keys, 0 commit groups, 0.0 writes per commit group, ingest: 185.35 MB, 93.15 MB/s
```
Total time to complete the benchmark is 2611 seconds with the patch, down from 2986 secs.
Reviewed By: riversand963
Differential Revision: D34082728
Pulled By: ltamasi
fbshipit-source-id: fc598abf676dce436734d06bb9d2d99a26a004fc
2022-02-09 21:35:39 +01:00
|
|
|
ASSERT_EQ(new_vstorage.GetBlobFileMetaData(/* blob_file_number */ 3),
|
|
|
|
meta3);
|
2020-06-12 18:52:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
}
|
|
|
|
|
2020-05-05 23:52:18 +02:00
|
|
|
TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) {
|
|
|
|
Add(0, 1U, "150", "200", 100U);
|
2022-02-04 17:18:18 +01:00
|
|
|
|
2020-05-05 23:52:18 +02:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
VersionEdit version_edit;
|
|
|
|
version_edit.DeleteFile(0, 1U);
|
|
|
|
|
|
|
|
EnvOptions env_options;
|
|
|
|
constexpr TableCache* table_cache = nullptr;
|
|
|
|
constexpr VersionSet* version_set = nullptr;
|
|
|
|
|
|
|
|
VersionBuilder version_builder(env_options, &ioptions_, table_cache,
|
|
|
|
&vstorage_, version_set);
|
|
|
|
VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, nullptr,
|
|
|
|
true /* force_consistency_checks */);
|
|
|
|
ASSERT_OK(version_builder.Apply(&version_edit));
|
|
|
|
ASSERT_OK(version_builder.SaveTo(&new_vstorage));
|
|
|
|
|
2022-02-04 17:18:18 +01:00
|
|
|
UpdateVersionStorageInfo(&new_vstorage);
|
|
|
|
|
2020-05-05 23:52:18 +02:00
|
|
|
VersionBuilder version_builder2(env_options, &ioptions_, table_cache,
|
|
|
|
&new_vstorage, version_set);
|
|
|
|
VersionStorageInfo new_vstorage2(&icmp_, ucmp_, options_.num_levels,
|
|
|
|
kCompactionStyleLevel, nullptr,
|
|
|
|
true /* force_consistency_checks */);
|
|
|
|
ASSERT_NOK(version_builder2.Apply(&version_edit));
|
|
|
|
|
|
|
|
UnrefFilesInVersion(&new_vstorage);
|
|
|
|
UnrefFilesInVersion(&new_vstorage2);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(VersionBuilderTest, EstimatedActiveKeys) {
|
2014-11-12 00:22:06 +01:00
|
|
|
const uint32_t kTotalSamples = 20;
|
|
|
|
const uint32_t kNumLevels = 5;
|
|
|
|
const uint32_t kFilesPerLevel = 8;
|
|
|
|
const uint32_t kNumFiles = kNumLevels * kFilesPerLevel;
|
|
|
|
const uint32_t kEntriesPerFile = 1000;
|
|
|
|
const uint32_t kDeletionsPerFile = 100;
|
|
|
|
for (uint32_t i = 0; i < kNumFiles; ++i) {
|
|
|
|
Add(static_cast<int>(i / kFilesPerLevel), i + 1,
|
2022-05-06 22:03:58 +02:00
|
|
|
std::to_string((i + 100) * 1000).c_str(),
|
|
|
|
std::to_string((i + 100) * 1000 + 999).c_str(), 100U, 0, 100, 100,
|
|
|
|
kEntriesPerFile, kDeletionsPerFile, (i < kTotalSamples));
|
2014-11-11 23:28:18 +01:00
|
|
|
}
|
|
|
|
// minus 2X for the number of deletion entries because:
|
|
|
|
// 1x for deletion entry does not count as a data entry.
|
|
|
|
// 1x for each deletion entry will actually remove one data entry.
|
|
|
|
ASSERT_EQ(vstorage_.GetEstimatedActiveKeys(),
|
|
|
|
(kEntriesPerFile - 2 * kDeletionsPerFile) * kNumFiles);
|
|
|
|
}
|
|
|
|
|
2020-02-20 21:07:53 +01:00
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
2014-10-31 16:48:19 +01:00
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|