Compare commits
15 Commits
main
...
blob_shado
Author | SHA1 | Date | |
---|---|---|---|
|
808333e6f0 | ||
|
f66da2e1ab | ||
|
004a02ee10 | ||
|
258549b739 | ||
|
6d4ef9e839 | ||
|
af9746fd22 | ||
|
5efc4bd8ea | ||
|
1e21dcc833 | ||
|
f7285ce5be | ||
|
76698fe15e | ||
|
9196e80b15 | ||
|
243ca14116 | ||
|
1c3c1c4340 | ||
|
3e40def2c1 | ||
|
766f062bbb |
@ -169,7 +169,7 @@ if(PORTABLE)
|
||||
# MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h
|
||||
# is available, it is available by default.
|
||||
if(FORCE_SSE42 AND NOT MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mpclmul")
|
||||
endif()
|
||||
else()
|
||||
if(MSVC)
|
||||
@ -181,13 +181,18 @@ endif()
|
||||
|
||||
include(CheckCXXSourceCompiles)
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_REQUIRED_FLAGS "-msse4.2")
|
||||
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
|
||||
endif()
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
#include <cstdint>
|
||||
#include <nmmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
int main() {
|
||||
volatile uint32_t x = _mm_crc32_u32(0, 0);
|
||||
const auto a = _mm_set_epi64x(0, 0);
|
||||
const auto b = _mm_set_epi64x(0, 0);
|
||||
const auto c = _mm_clmulepi64_si128(a, b, 0x00);
|
||||
auto d = _mm_cvtsi128_si64(c);
|
||||
}
|
||||
" HAVE_SSE42)
|
||||
unset(CMAKE_REQUIRED_FLAGS)
|
||||
@ -608,7 +613,7 @@ if(HAVE_SSE42 AND NOT FORCE_SSE42)
|
||||
if(NOT MSVC)
|
||||
set_source_files_properties(
|
||||
util/crc32c.cc
|
||||
PROPERTIES COMPILE_FLAGS "-msse4.2")
|
||||
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -1,9 +1,12 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
|
||||
## 5.10.0 (12/11/2017)
|
||||
### Public API Change
|
||||
* When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features.
|
||||
|
||||
### New Features
|
||||
* CRC32C is now using the 3-way pipelined SSE algorithm `crc32c_3way` on supported platforms to improve performance. The system will choose to use this algorithm on supported platforms automatically whenever possible. If PCLMULQDQ is not supported it will fall back to the old Fast_CRC32 algorithm.
|
||||
* Provide lifetime hints when writing files on Linux. This reduces hardware write-amp on storage devices supporting multiple streams.
|
||||
* Add a DB stat, `NUMBER_ITER_SKIP`, which returns how many internal keys were skipped during iterations (e.g., due to being tombstones or duplicate versions of a key).
|
||||
* Add PerfContext counters, `key_lock_wait_count` and `key_lock_wait_time`, which measure the number of times transactions wait on key locks and total amount of time waiting.
|
||||
|
3
Makefile
3
Makefile
@ -305,6 +305,9 @@ LDFLAGS += $(LUA_LIB)
|
||||
|
||||
endif
|
||||
|
||||
ifeq ($(NO_THREEWAY_CRC32C), 1)
|
||||
CXXFLAGS += -DNO_THREEWAY_CRC32C
|
||||
endif
|
||||
|
||||
CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
|
||||
CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers
|
||||
|
@ -484,10 +484,10 @@ if test -z "$PORTABLE"; then
|
||||
elif [ "$TARGET_OS" != AIX ] && [ "$TARGET_OS" != SunOS ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -march=native "
|
||||
elif test "$USE_SSE"; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse4.2"
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse4.2 -mpclmul"
|
||||
fi
|
||||
elif test "$USE_SSE"; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse4.2"
|
||||
COMMON_FLAGS="$COMMON_FLAGS -msse4.2 -mpclmul"
|
||||
fi
|
||||
|
||||
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
@ -501,6 +501,24 @@ if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42"
|
||||
elif test "$USE_SSE"; then
|
||||
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
#include <cstdint>
|
||||
#include <wmmintrin.h>
|
||||
int main() {
|
||||
const auto a = _mm_set_epi64x(0, 0);
|
||||
const auto b = _mm_set_epi64x(0, 0);
|
||||
const auto c = _mm_clmulepi64_si128(a, b, 0x00);
|
||||
auto d = _mm_cvtsi128_si64(c);
|
||||
}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_PCLMUL"
|
||||
elif test "$USE_SSE"; then
|
||||
echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# iOS doesn't support thread-local storage, but this check would erroneously
|
||||
|
@ -625,7 +625,8 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
|
||||
"[%s] compacted to: %s, MB/sec: %.1f rd, %.1f wr, level %d, "
|
||||
"files in(%d, %d) out(%d) "
|
||||
"MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
|
||||
"write-amplify(%.1f) %s, records in: %d, records dropped: %d\n",
|
||||
"write-amplify(%.1f) %s, records in: %d, records dropped: %d "
|
||||
"output_compression: %s\n",
|
||||
cfd->GetName().c_str(), vstorage->LevelSummary(&tmp), bytes_read_per_sec,
|
||||
bytes_written_per_sec, compact_->compaction->output_level(),
|
||||
stats.num_input_files_in_non_output_levels,
|
||||
@ -634,20 +635,23 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
|
||||
stats.bytes_read_output_level / 1048576.0,
|
||||
stats.bytes_written / 1048576.0, read_write_amp, write_amp,
|
||||
status.ToString().c_str(), stats.num_input_records,
|
||||
stats.num_dropped_records);
|
||||
stats.num_dropped_records,
|
||||
CompressionTypeToString(compact_->compaction->output_compression())
|
||||
.c_str());
|
||||
|
||||
UpdateCompactionJobStats(stats);
|
||||
|
||||
auto stream = event_logger_->LogToBuffer(log_buffer_);
|
||||
stream << "job" << job_id_
|
||||
<< "event" << "compaction_finished"
|
||||
stream << "job" << job_id_ << "event"
|
||||
<< "compaction_finished"
|
||||
<< "compaction_time_micros" << compaction_stats_.micros
|
||||
<< "output_level" << compact_->compaction->output_level()
|
||||
<< "num_output_files" << compact_->NumOutputFiles()
|
||||
<< "total_output_size" << compact_->total_bytes
|
||||
<< "num_input_records" << compact_->num_input_records
|
||||
<< "num_output_records" << compact_->num_output_records
|
||||
<< "num_subcompactions" << compact_->sub_compact_states.size();
|
||||
<< "total_output_size" << compact_->total_bytes << "num_input_records"
|
||||
<< compact_->num_input_records << "num_output_records"
|
||||
<< compact_->num_output_records << "num_subcompactions"
|
||||
<< compact_->sub_compact_states.size() << "output_compression"
|
||||
<< CompressionTypeToString(compact_->compaction->output_compression());
|
||||
|
||||
if (compaction_job_stats_ != nullptr) {
|
||||
stream << "num_single_delete_mismatches"
|
||||
|
@ -1682,12 +1682,6 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
|
||||
delete casted_s;
|
||||
}
|
||||
|
||||
bool DBImpl::HasActiveSnapshotInRange(SequenceNumber lower_bound,
|
||||
SequenceNumber upper_bound) {
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
return snapshots_.HasSnapshotInRange(lower_bound, upper_bound);
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
|
||||
TablePropertiesCollection* props) {
|
||||
|
@ -229,10 +229,6 @@ class DBImpl : public DB {
|
||||
|
||||
virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) override;
|
||||
|
||||
// Whether there is an active snapshot in range [lower_bound, upper_bound).
|
||||
bool HasActiveSnapshotInRange(SequenceNumber lower_bound,
|
||||
SequenceNumber upper_bound);
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
using DB::ResetStats;
|
||||
virtual Status ResetStats() override;
|
||||
|
@ -39,52 +39,6 @@ TEST_P(DBWriteTest, SyncAndDisableWAL) {
|
||||
ASSERT_TRUE(dbfull()->Write(write_options, &batch).IsInvalidArgument());
|
||||
}
|
||||
|
||||
// Sequence number should be return through input write batch.
|
||||
TEST_P(DBWriteTest, ReturnSeuqneceNumber) {
|
||||
Random rnd(4422);
|
||||
Open();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
WriteBatch batch;
|
||||
batch.Put("key" + ToString(i), test::RandomHumanReadableString(&rnd, 10));
|
||||
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(),
|
||||
WriteBatchInternal::Sequence(&batch));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(DBWriteTest, ReturnSeuqneceNumberMultiThreaded) {
|
||||
constexpr size_t kThreads = 16;
|
||||
constexpr size_t kNumKeys = 1000;
|
||||
Open();
|
||||
ASSERT_EQ(0, dbfull()->GetLatestSequenceNumber());
|
||||
// Check each sequence is used once and only once.
|
||||
std::vector<std::atomic_flag> flags(kNumKeys * kThreads + 1);
|
||||
for (size_t i = 0; i < flags.size(); i++) {
|
||||
flags[i].clear();
|
||||
}
|
||||
auto writer = [&](size_t id) {
|
||||
Random rnd(4422 + static_cast<uint32_t>(id));
|
||||
for (size_t k = 0; k < kNumKeys; k++) {
|
||||
WriteBatch batch;
|
||||
batch.Put("key" + ToString(id) + "-" + ToString(k),
|
||||
test::RandomHumanReadableString(&rnd, 10));
|
||||
ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
|
||||
SequenceNumber sequence = WriteBatchInternal::Sequence(&batch);
|
||||
ASSERT_GT(sequence, 0);
|
||||
ASSERT_LE(sequence, kNumKeys * kThreads);
|
||||
// The sequence isn't consumed by someone else.
|
||||
ASSERT_FALSE(flags[sequence].test_and_set());
|
||||
}
|
||||
};
|
||||
std::vector<port::Thread> threads;
|
||||
for (size_t i = 0; i < kThreads; i++) {
|
||||
threads.emplace_back(writer, i);
|
||||
}
|
||||
for (size_t i = 0; i < kThreads; i++) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(DBWriteTest, IOErrorOnWALWritePropagateToWriteThreadFollower) {
|
||||
constexpr int kNumThreads = 5;
|
||||
std::unique_ptr<FaultInjectionTestEnv> mock_env(
|
||||
|
@ -209,6 +209,8 @@ Status FlushJob::Run(FileMetaData* file_meta) {
|
||||
auto stream = event_logger_->LogToBuffer(log_buffer_);
|
||||
stream << "job" << job_context_->job_id << "event"
|
||||
<< "flush_finished";
|
||||
stream << "output_compression"
|
||||
<< CompressionTypeToString(output_compression_);
|
||||
stream << "lsm_state";
|
||||
stream.StartArray();
|
||||
auto vstorage = cfd_->current()->storage_info();
|
||||
|
@ -108,22 +108,6 @@ class SnapshotList {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Whether there is an active snapshot in range [lower_bound, upper_bound).
|
||||
bool HasSnapshotInRange(SequenceNumber lower_bound,
|
||||
SequenceNumber upper_bound) {
|
||||
if (empty()) {
|
||||
return false;
|
||||
}
|
||||
const SnapshotImpl* s = &list_;
|
||||
while (s->next_ != &list_) {
|
||||
if (s->next_->number_ >= lower_bound) {
|
||||
return s->next_->number_ < upper_bound;
|
||||
}
|
||||
s = s->next_;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// get the sequence number of the most recent snapshot
|
||||
SequenceNumber GetNewest() {
|
||||
if (empty()) {
|
||||
|
@ -980,10 +980,12 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
storage_info_.num_non_empty_levels_, &storage_info_.file_indexer_,
|
||||
user_comparator(), internal_comparator());
|
||||
FdWithKeyRange* f = fp.GetNextFile();
|
||||
|
||||
while (f != nullptr) {
|
||||
if (get_context.sample()) {
|
||||
sample_file_read_inc(f->file_metadata);
|
||||
}
|
||||
|
||||
*status = table_cache_->Get(
|
||||
read_options, *internal_comparator(), f->fd, ikey, &get_context,
|
||||
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
||||
@ -995,10 +997,21 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
return;
|
||||
}
|
||||
|
||||
// report the counters before returning
|
||||
if (get_context.State() != GetContext::kNotFound &&
|
||||
get_context.State() != GetContext::kMerge) {
|
||||
for (uint32_t t = 0; t < Tickers::TICKER_ENUM_MAX; t++) {
|
||||
if (get_context.tickers_value[t] > 0) {
|
||||
RecordTick(db_statistics_, t, get_context.tickers_value[t]);
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (get_context.State()) {
|
||||
case GetContext::kNotFound:
|
||||
// Keep searching in other files
|
||||
break;
|
||||
case GetContext::kMerge:
|
||||
break;
|
||||
case GetContext::kFound:
|
||||
if (fp.GetHitFileLevel() == 0) {
|
||||
RecordTick(db_statistics_, GET_HIT_L0);
|
||||
@ -1015,8 +1028,6 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
case GetContext::kCorrupt:
|
||||
*status = Status::Corruption("corrupted key for ", user_key);
|
||||
return;
|
||||
case GetContext::kMerge:
|
||||
break;
|
||||
case GetContext::kBlobIndex:
|
||||
ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
|
||||
*status = Status::NotSupported(
|
||||
@ -1027,6 +1038,11 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
||||
f = fp.GetNextFile();
|
||||
}
|
||||
|
||||
for (uint32_t t = 0; t < Tickers::TICKER_ENUM_MAX; t++) {
|
||||
if (get_context.tickers_value[t] > 0) {
|
||||
RecordTick(db_statistics_, t, get_context.tickers_value[t]);
|
||||
}
|
||||
}
|
||||
if (GetContext::kMerge == get_context.State()) {
|
||||
if (!merge_operator_) {
|
||||
*status = Status::InvalidArgument(
|
||||
|
@ -415,7 +415,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
{BLOB_DB_WRITE_BLOB, "rocksdb.blobdb.write.blob"},
|
||||
{BLOB_DB_WRITE_BLOB_TTL, "rocksdb.blobdb.write.blob.ttl"},
|
||||
{BLOB_DB_BLOB_FILE_BYTES_WRITTEN, "rocksdb.blobdb.blob.file.bytes.written"},
|
||||
{BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file,bytes.read"},
|
||||
{BLOB_DB_BLOB_FILE_BYTES_READ, "rocksdb.blobdb.blob.file.bytes.read"},
|
||||
{BLOB_DB_BLOB_FILE_SYNCED, "rocksdb.blobdb.blob.file.synced"},
|
||||
{BLOB_DB_BLOB_INDEX_EXPIRED, "rocksdb.blobdb.blob.index.expired"},
|
||||
{BLOB_DB_GC_NUM_FILES, "rocksdb.blobdb.gc.num.files"},
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "rocksdb/db.h"
|
||||
|
||||
@ -18,11 +19,20 @@ namespace rocksdb {
|
||||
// This class contains APIs to stack rocksdb wrappers.Eg. Stack TTL over base d
|
||||
class StackableDB : public DB {
|
||||
public:
|
||||
// StackableDB is the owner of db now!
|
||||
// StackableDB take sole ownership of the underlying db.
|
||||
explicit StackableDB(DB* db) : db_(db) {}
|
||||
|
||||
// StackableDB take shared ownership of the underlying db.
|
||||
explicit StackableDB(std::shared_ptr<DB> db)
|
||||
: db_(db.get()), shared_db_ptr_(db) {}
|
||||
|
||||
~StackableDB() {
|
||||
delete db_;
|
||||
if (shared_db_ptr_ == nullptr) {
|
||||
delete db_;
|
||||
} else {
|
||||
assert(shared_db_ptr_.get() == db_);
|
||||
}
|
||||
db_ = nullptr;
|
||||
}
|
||||
|
||||
virtual DB* GetBaseDB() {
|
||||
@ -373,6 +383,7 @@ class StackableDB : public DB {
|
||||
|
||||
protected:
|
||||
DB* db_;
|
||||
std::shared_ptr<DB> shared_db_ptr_;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -5,7 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCKSDB_MAJOR 5
|
||||
#define ROCKSDB_MINOR 9
|
||||
#define ROCKSDB_MINOR 10
|
||||
#define ROCKSDB_PATCH 0
|
||||
|
||||
// Do not use these. We made the mistake of declaring macros starting with
|
||||
|
@ -527,11 +527,11 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
|
||||
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_NOT_COMPRESSED);
|
||||
type = kNoCompression;
|
||||
block_contents = raw_block_contents;
|
||||
} else if (type != kNoCompression &&
|
||||
ShouldReportDetailedTime(r->ioptions.env,
|
||||
r->ioptions.statistics)) {
|
||||
MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS,
|
||||
timer.ElapsedNanos());
|
||||
} else if (type != kNoCompression) {
|
||||
if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) {
|
||||
MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS,
|
||||
timer.ElapsedNanos());
|
||||
}
|
||||
MeasureTime(r->ioptions.statistics, BYTES_COMPRESSED,
|
||||
raw_block_contents.size());
|
||||
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED);
|
||||
|
@ -126,22 +126,37 @@ Slice GetCacheKeyFromOffset(const char* cache_key_prefix,
|
||||
Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
|
||||
Tickers block_cache_miss_ticker,
|
||||
Tickers block_cache_hit_ticker,
|
||||
Statistics* statistics) {
|
||||
Statistics* statistics,
|
||||
GetContext* get_context) {
|
||||
auto cache_handle = block_cache->Lookup(key, statistics);
|
||||
if (cache_handle != nullptr) {
|
||||
PERF_COUNTER_ADD(block_cache_hit_count, 1);
|
||||
// overall cache hit
|
||||
RecordTick(statistics, BLOCK_CACHE_HIT);
|
||||
// total bytes read from cache
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_READ,
|
||||
block_cache->GetUsage(cache_handle));
|
||||
// block-type specific cache hit
|
||||
RecordTick(statistics, block_cache_hit_ticker);
|
||||
if (get_context != nullptr) {
|
||||
// overall cache hit
|
||||
get_context->RecordCounters(BLOCK_CACHE_HIT, 1);
|
||||
// total bytes read from cache
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_READ,
|
||||
block_cache->GetUsage(cache_handle));
|
||||
// block-type specific cache hit
|
||||
get_context->RecordCounters(block_cache_hit_ticker, 1);
|
||||
} else {
|
||||
// overall cache hit
|
||||
RecordTick(statistics, BLOCK_CACHE_HIT);
|
||||
// total bytes read from cache
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_READ,
|
||||
block_cache->GetUsage(cache_handle));
|
||||
RecordTick(statistics, block_cache_hit_ticker);
|
||||
}
|
||||
} else {
|
||||
// overall cache miss
|
||||
RecordTick(statistics, BLOCK_CACHE_MISS);
|
||||
// block-type specific cache miss
|
||||
RecordTick(statistics, block_cache_miss_ticker);
|
||||
if (get_context != nullptr) {
|
||||
// overall cache miss
|
||||
get_context->RecordCounters(BLOCK_CACHE_MISS, 1);
|
||||
// block-type specific cache miss
|
||||
get_context->RecordCounters(block_cache_miss_ticker, 1);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_MISS);
|
||||
RecordTick(statistics, block_cache_miss_ticker);
|
||||
}
|
||||
}
|
||||
|
||||
return cache_handle;
|
||||
@ -253,9 +268,11 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
|
||||
compression_dict = rep->compression_dict_block->data;
|
||||
}
|
||||
const bool is_index = true;
|
||||
s = table_->MaybeLoadDataBlockToCache(prefetch_buffer.get(), rep, ro,
|
||||
handle, compression_dict, &block,
|
||||
is_index);
|
||||
// TODO: Support counter batch update for partitioned index and
|
||||
// filter blocks
|
||||
s = table_->MaybeLoadDataBlockToCache(
|
||||
prefetch_buffer.get(), rep, ro, handle, compression_dict, &block,
|
||||
is_index, nullptr /* get_context */);
|
||||
|
||||
assert(s.ok() || block.value == nullptr);
|
||||
if (s.ok() && block.value != nullptr) {
|
||||
@ -779,7 +796,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
||||
ReadOptions read_options;
|
||||
s = MaybeLoadDataBlockToCache(
|
||||
prefetch_buffer.get(), rep, read_options, rep->range_del_handle,
|
||||
Slice() /* compression_dict */, &rep->range_del_entry);
|
||||
Slice() /* compression_dict */, &rep->range_del_entry,
|
||||
false /* is_index */, nullptr /* get_context */);
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_WARN(
|
||||
rep->ioptions.info_log,
|
||||
@ -955,8 +973,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
||||
Cache* block_cache, Cache* block_cache_compressed,
|
||||
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
||||
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||
bool is_index) {
|
||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index,
|
||||
GetContext* get_context) {
|
||||
Status s;
|
||||
Block* compressed_block = nullptr;
|
||||
Cache::Handle* block_cache_compressed_handle = nullptr;
|
||||
@ -967,7 +985,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
||||
block->cache_handle = GetEntryFromCache(
|
||||
block_cache, block_cache_key,
|
||||
is_index ? BLOCK_CACHE_INDEX_MISS : BLOCK_CACHE_DATA_MISS,
|
||||
is_index ? BLOCK_CACHE_INDEX_HIT : BLOCK_CACHE_DATA_HIT, statistics);
|
||||
is_index ? BLOCK_CACHE_INDEX_HIT : BLOCK_CACHE_DATA_HIT, statistics,
|
||||
get_context);
|
||||
if (block->cache_handle != nullptr) {
|
||||
block->value =
|
||||
reinterpret_cast<Block*>(block_cache->Value(block->cache_handle));
|
||||
@ -1020,18 +1039,36 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
||||
block_cache->TEST_mark_as_data_block(block_cache_key,
|
||||
block->value->usable_size());
|
||||
if (s.ok()) {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
if (is_index) {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
}
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
if (is_index) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
}
|
||||
} else {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
|
||||
delete block->value;
|
||||
@ -1051,7 +1088,7 @@ Status BlockBasedTable::PutDataBlockToCache(
|
||||
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
||||
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index,
|
||||
Cache::Priority priority) {
|
||||
Cache::Priority priority, GetContext* get_context) {
|
||||
assert(raw_block->compression_type() == kNoCompression ||
|
||||
block_cache_compressed != nullptr);
|
||||
|
||||
@ -1104,18 +1141,36 @@ Status BlockBasedTable::PutDataBlockToCache(
|
||||
block->value->usable_size());
|
||||
if (s.ok()) {
|
||||
assert(block->cache_handle != nullptr);
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
if (is_index) {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
}
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
if (is_index) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
}
|
||||
} else {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
}
|
||||
}
|
||||
assert(reinterpret_cast<Block*>(
|
||||
block_cache->Value(block->cache_handle)) == block->value);
|
||||
} else {
|
||||
@ -1188,16 +1243,18 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
|
||||
}
|
||||
|
||||
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer, bool no_io) const {
|
||||
FilePrefetchBuffer* prefetch_buffer, bool no_io,
|
||||
GetContext* get_context) const {
|
||||
const BlockHandle& filter_blk_handle = rep_->filter_handle;
|
||||
const bool is_a_filter_partition = true;
|
||||
return GetFilter(prefetch_buffer, filter_blk_handle, !is_a_filter_partition,
|
||||
no_io);
|
||||
no_io, get_context);
|
||||
}
|
||||
|
||||
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
|
||||
const bool is_a_filter_partition, bool no_io) const {
|
||||
const bool is_a_filter_partition, bool no_io,
|
||||
GetContext* get_context) const {
|
||||
// If cache_index_and_filter_blocks is false, filter should be pre-populated.
|
||||
// We will return rep_->filter anyway. rep_->filter can be nullptr if filter
|
||||
// read fails at Open() time. We don't want to reload again since it will
|
||||
@ -1227,7 +1284,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
Statistics* statistics = rep_->ioptions.statistics;
|
||||
auto cache_handle =
|
||||
GetEntryFromCache(block_cache, key, BLOCK_CACHE_FILTER_MISS,
|
||||
BLOCK_CACHE_FILTER_HIT, statistics);
|
||||
BLOCK_CACHE_FILTER_HIT, statistics, get_context);
|
||||
|
||||
FilterBlockReader* filter = nullptr;
|
||||
if (cache_handle != nullptr) {
|
||||
@ -1247,10 +1304,19 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
? Cache::Priority::HIGH
|
||||
: Cache::Priority::LOW);
|
||||
if (s.ok()) {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, filter->size());
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size());
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, filter->size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_FILTER_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_FILTER_BYTES_INSERT,
|
||||
filter->size());
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size());
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT,
|
||||
filter->size());
|
||||
}
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
|
||||
delete filter;
|
||||
@ -1264,7 +1330,7 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
|
||||
InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||
const ReadOptions& read_options, BlockIter* input_iter,
|
||||
CachableEntry<IndexReader>* index_entry) {
|
||||
CachableEntry<IndexReader>* index_entry, GetContext* get_context) {
|
||||
// index reader has already been pre-populated.
|
||||
if (rep_->index_reader) {
|
||||
return rep_->index_reader->NewIterator(
|
||||
@ -1287,7 +1353,7 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||
Statistics* statistics = rep_->ioptions.statistics;
|
||||
auto cache_handle =
|
||||
GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS,
|
||||
BLOCK_CACHE_INDEX_HIT, statistics);
|
||||
BLOCK_CACHE_INDEX_HIT, statistics, get_context);
|
||||
|
||||
if (cache_handle == nullptr && no_io) {
|
||||
if (input_iter != nullptr) {
|
||||
@ -1322,10 +1388,15 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||
|
||||
if (s.ok()) {
|
||||
size_t usable_size = index_reader->usable_size();
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, usable_size);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usable_size);
|
||||
}
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usable_size);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usable_size);
|
||||
} else {
|
||||
if (index_reader != nullptr) {
|
||||
delete index_reader;
|
||||
@ -1359,13 +1430,14 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||
|
||||
InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||
Rep* rep, const ReadOptions& ro, const Slice& index_value,
|
||||
BlockIter* input_iter, bool is_index) {
|
||||
BlockIter* input_iter, bool is_index, GetContext* get_context) {
|
||||
BlockHandle handle;
|
||||
Slice input = index_value;
|
||||
// We intentionally allow extra stuff in index_value so that we
|
||||
// can add more features in the future.
|
||||
Status s = handle.DecodeFrom(&input);
|
||||
return NewDataBlockIterator(rep, ro, handle, input_iter, is_index, s);
|
||||
return NewDataBlockIterator(rep, ro, handle, input_iter, is_index,
|
||||
get_context, s);
|
||||
}
|
||||
|
||||
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
||||
@ -1374,7 +1446,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||
// If input_iter is not null, update this iter and return it
|
||||
InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||
Rep* rep, const ReadOptions& ro, const BlockHandle& handle,
|
||||
BlockIter* input_iter, bool is_index, Status s) {
|
||||
BlockIter* input_iter, bool is_index, GetContext* get_context, Status s) {
|
||||
PERF_TIMER_GUARD(new_table_block_iter_nanos);
|
||||
|
||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||
@ -1386,7 +1458,8 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||
compression_dict = rep->compression_dict_block->data;
|
||||
}
|
||||
s = MaybeLoadDataBlockToCache(nullptr /*prefetch_buffer*/, rep, ro, handle,
|
||||
compression_dict, &block, is_index);
|
||||
compression_dict, &block, is_index,
|
||||
get_context);
|
||||
}
|
||||
|
||||
// Didn't get any data from block caches.
|
||||
@ -1437,7 +1510,7 @@ InternalIterator* BlockBasedTable::NewDataBlockIterator(
|
||||
Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
||||
FilePrefetchBuffer* prefetch_buffer, Rep* rep, const ReadOptions& ro,
|
||||
const BlockHandle& handle, Slice compression_dict,
|
||||
CachableEntry<Block>* block_entry, bool is_index) {
|
||||
CachableEntry<Block>* block_entry, bool is_index, GetContext* get_context) {
|
||||
assert(block_entry != nullptr);
|
||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||
Cache* block_cache = rep->table_options.block_cache.get();
|
||||
@ -1468,7 +1541,7 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
||||
s = GetDataBlockFromCache(
|
||||
key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro,
|
||||
block_entry, rep->table_options.format_version, compression_dict,
|
||||
rep->table_options.read_amp_bytes_per_bit, is_index);
|
||||
rep->table_options.read_amp_bytes_per_bit, is_index, get_context);
|
||||
|
||||
if (block_entry->value == nullptr && !no_io && ro.fill_cache) {
|
||||
std::unique_ptr<Block> raw_block;
|
||||
@ -1487,11 +1560,11 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
|
||||
block_entry, raw_block.release(), rep->table_options.format_version,
|
||||
compression_dict, rep->table_options.read_amp_bytes_per_bit,
|
||||
is_index,
|
||||
is_index &&
|
||||
rep->table_options
|
||||
.cache_index_and_filter_blocks_with_high_priority
|
||||
is_index && rep->table_options
|
||||
.cache_index_and_filter_blocks_with_high_priority
|
||||
? Cache::Priority::HIGH
|
||||
: Cache::Priority::LOW);
|
||||
: Cache::Priority::LOW,
|
||||
get_context);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1535,8 +1608,9 @@ BlockBasedTable::BlockEntryIteratorState::NewSecondaryIterator(
|
||||
&rep->internal_comparator, nullptr, true, rep->ioptions.statistics);
|
||||
}
|
||||
}
|
||||
return NewDataBlockIterator(rep, read_options_, handle, nullptr, is_index_,
|
||||
s);
|
||||
return NewDataBlockIterator(rep, read_options_, handle,
|
||||
/* input_iter */ nullptr, is_index_,
|
||||
/* get_context */ nullptr, s);
|
||||
}
|
||||
|
||||
bool BlockBasedTable::BlockEntryIteratorState::PrefixMayMatch(
|
||||
@ -1730,8 +1804,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
const bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||
CachableEntry<FilterBlockReader> filter_entry;
|
||||
if (!skip_filters) {
|
||||
filter_entry = GetFilter(/*prefetch_buffer*/ nullptr,
|
||||
read_options.read_tier == kBlockCacheTier);
|
||||
filter_entry =
|
||||
GetFilter(/*prefetch_buffer*/ nullptr,
|
||||
read_options.read_tier == kBlockCacheTier, get_context);
|
||||
}
|
||||
FilterBlockReader* filter = filter_entry.value;
|
||||
|
||||
@ -1741,7 +1816,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
|
||||
} else {
|
||||
BlockIter iiter_on_stack;
|
||||
auto iiter = NewIndexIterator(read_options, &iiter_on_stack);
|
||||
auto iiter = NewIndexIterator(read_options, &iiter_on_stack,
|
||||
/* index_entry */ nullptr, get_context);
|
||||
std::unique_ptr<InternalIterator> iiter_unique_ptr;
|
||||
if (iiter != &iiter_on_stack) {
|
||||
iiter_unique_ptr.reset(iiter);
|
||||
@ -1765,7 +1841,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
||||
break;
|
||||
} else {
|
||||
BlockIter biter;
|
||||
NewDataBlockIterator(rep_, read_options, iiter->value(), &biter);
|
||||
NewDataBlockIterator(rep_, read_options, iiter->value(), &biter, false,
|
||||
get_context);
|
||||
|
||||
if (read_options.read_tier == kBlockCacheTier &&
|
||||
biter.status().IsIncomplete()) {
|
||||
|
@ -215,15 +215,14 @@ class BlockBasedTable : public TableReader {
|
||||
private:
|
||||
friend class MockedBlockBasedTable;
|
||||
// input_iter: if it is not null, update this one and return it as Iterator
|
||||
static InternalIterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||
const Slice& index_value,
|
||||
BlockIter* input_iter = nullptr,
|
||||
bool is_index = false);
|
||||
static InternalIterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||
const BlockHandle& block_hanlde,
|
||||
BlockIter* input_iter = nullptr,
|
||||
bool is_index = false,
|
||||
Status s = Status());
|
||||
static InternalIterator* NewDataBlockIterator(
|
||||
Rep* rep, const ReadOptions& ro, const Slice& index_value,
|
||||
BlockIter* input_iter = nullptr, bool is_index = false,
|
||||
GetContext* get_context = nullptr);
|
||||
static InternalIterator* NewDataBlockIterator(
|
||||
Rep* rep, const ReadOptions& ro, const BlockHandle& block_hanlde,
|
||||
BlockIter* input_iter = nullptr, bool is_index = false,
|
||||
GetContext* get_context = nullptr, Status s = Status());
|
||||
// If block cache enabled (compressed or uncompressed), looks for the block
|
||||
// identified by handle in (1) uncompressed cache, (2) compressed cache, and
|
||||
// then (3) file. If found, inserts into the cache(s) that were searched
|
||||
@ -238,16 +237,19 @@ class BlockBasedTable : public TableReader {
|
||||
const BlockHandle& handle,
|
||||
Slice compression_dict,
|
||||
CachableEntry<Block>* block_entry,
|
||||
bool is_index = false);
|
||||
bool is_index = false,
|
||||
GetContext* get_context = nullptr);
|
||||
|
||||
// For the following two functions:
|
||||
// if `no_io == true`, we will not try to read filter/index from sst file
|
||||
// were they not present in cache yet.
|
||||
CachableEntry<FilterBlockReader> GetFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer = nullptr, bool no_io = false) const;
|
||||
FilePrefetchBuffer* prefetch_buffer = nullptr, bool no_io = false,
|
||||
GetContext* get_context = nullptr) const;
|
||||
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_blk_handle,
|
||||
const bool is_a_filter_partition, bool no_io) const;
|
||||
const bool is_a_filter_partition, bool no_io,
|
||||
GetContext* get_context) const;
|
||||
|
||||
// Get the iterator from the index reader.
|
||||
// If input_iter is not set, return new Iterator
|
||||
@ -261,7 +263,8 @@ class BlockBasedTable : public TableReader {
|
||||
// kBlockCacheTier
|
||||
InternalIterator* NewIndexIterator(
|
||||
const ReadOptions& read_options, BlockIter* input_iter = nullptr,
|
||||
CachableEntry<IndexReader>* index_entry = nullptr);
|
||||
CachableEntry<IndexReader>* index_entry = nullptr,
|
||||
GetContext* get_context = nullptr);
|
||||
|
||||
// Read block cache from block caches (if set): block_cache and
|
||||
// block_cache_compressed.
|
||||
@ -275,7 +278,7 @@ class BlockBasedTable : public TableReader {
|
||||
const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
|
||||
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
|
||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||
bool is_index = false);
|
||||
bool is_index = false, GetContext* get_context = nullptr);
|
||||
|
||||
// Put a raw block (maybe compressed) to the corresponding block caches.
|
||||
// This method will perform decompression against raw_block if needed and then
|
||||
@ -293,7 +296,8 @@ class BlockBasedTable : public TableReader {
|
||||
const ReadOptions& read_options, const ImmutableCFOptions& ioptions,
|
||||
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
|
||||
const Slice& compression_dict, size_t read_amp_bytes_per_bit,
|
||||
bool is_index = false, Cache::Priority pri = Cache::Priority::LOW);
|
||||
bool is_index = false, Cache::Priority pri = Cache::Priority::LOW,
|
||||
GetContext* get_context = nullptr);
|
||||
|
||||
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
|
||||
// after a call to Seek(key), until handle_result returns false.
|
||||
|
@ -568,9 +568,9 @@ Status UncompressBlockContentsForCompressionType(
|
||||
if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){
|
||||
MeasureTime(ioptions.statistics, DECOMPRESSION_TIMES_NANOS,
|
||||
timer.ElapsedNanos());
|
||||
MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size());
|
||||
RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED);
|
||||
}
|
||||
MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size());
|
||||
RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -87,6 +87,13 @@ void GetContext::SaveValue(const Slice& value, SequenceNumber seq) {
|
||||
}
|
||||
}
|
||||
|
||||
void GetContext::RecordCounters(Tickers ticker, size_t val) {
|
||||
if (ticker == Tickers::TICKER_ENUM_MAX) {
|
||||
return;
|
||||
}
|
||||
tickers_value[ticker] += static_cast<uint64_t>(val);
|
||||
}
|
||||
|
||||
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
|
||||
const Slice& value, Cleanable* value_pinner) {
|
||||
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "db/range_del_aggregator.h"
|
||||
#include "db/read_callback.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/statistics.h"
|
||||
#include "rocksdb/types.h"
|
||||
#include "table/block.h"
|
||||
|
||||
@ -26,6 +27,7 @@ class GetContext {
|
||||
kMerge, // saver contains the current merge result (the operands)
|
||||
kBlobIndex,
|
||||
};
|
||||
uint64_t tickers_value[Tickers::TICKER_ENUM_MAX] = {0};
|
||||
|
||||
GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
|
||||
Logger* logger, Statistics* statistics, GetState init_state,
|
||||
@ -72,6 +74,8 @@ class GetContext {
|
||||
return true;
|
||||
}
|
||||
|
||||
void RecordCounters(Tickers ticker, size_t val);
|
||||
|
||||
private:
|
||||
const Comparator* ucmp_;
|
||||
const MergeOperator* merge_operator_;
|
||||
|
@ -231,7 +231,8 @@ PartitionedFilterBlockReader::GetFilterPartition(
|
||||
}
|
||||
}
|
||||
return table_->GetFilter(/*prefetch_buffer*/ nullptr, fltr_blk_handle,
|
||||
is_a_filter_partition, no_io);
|
||||
is_a_filter_partition, no_io,
|
||||
/* get_context */ nullptr);
|
||||
} else {
|
||||
auto filter = table_->ReadFilter(prefetch_buffer, fltr_blk_handle,
|
||||
is_a_filter_partition);
|
||||
@ -295,7 +296,8 @@ void PartitionedFilterBlockReader::CacheDependencies(bool pin) {
|
||||
const bool no_io = true;
|
||||
const bool is_a_filter_partition = true;
|
||||
auto filter = table_->GetFilter(prefetch_buffer.get(), handle,
|
||||
is_a_filter_partition, !no_io);
|
||||
is_a_filter_partition, !no_io,
|
||||
/* get_context */ nullptr);
|
||||
if (LIKELY(filter.IsSet())) {
|
||||
if (pin) {
|
||||
filter_map_[handle.offset()] = std::move(filter);
|
||||
|
@ -29,7 +29,8 @@ class MockedBlockBasedTable : public BlockBasedTable {
|
||||
|
||||
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
||||
const bool /* unused */, bool /* unused */) const override {
|
||||
const bool /* unused */, bool /* unused */,
|
||||
GetContext* /* unused */) const override {
|
||||
Slice slice = slices[filter_blk_handle.offset()];
|
||||
auto obj = new FullFilterBlockReader(
|
||||
nullptr, true, BlockContents(slice, false, kNoCompression),
|
||||
|
@ -2752,8 +2752,10 @@ void VerifyDBFromDB(std::string& truth_db_name) {
|
||||
|
||||
void Crc32c(ThreadState* thread) {
|
||||
// Checksum about 500MB of data total
|
||||
const int size = 4096;
|
||||
const char* label = "(4K per op)";
|
||||
const int size = FLAGS_block_size; // use --block_size option for db_bench
|
||||
std::string labels = "(" + ToString(FLAGS_block_size) + " per op)";
|
||||
const char* label = labels.c_str();
|
||||
|
||||
std::string data(size, 'x');
|
||||
int64_t bytes = 0;
|
||||
uint32_t crc = 0;
|
||||
|
596
util/crc32c.cc
596
util/crc32c.cc
@ -9,12 +9,11 @@
|
||||
//
|
||||
// A portable implementation of crc32c, optimized to handle
|
||||
// four bytes at a time.
|
||||
|
||||
#include "util/crc32c.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_SSE42
|
||||
#include <nmmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
#endif
|
||||
#include "util/coding.h"
|
||||
|
||||
@ -352,6 +351,7 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
|
||||
|
||||
template<void (*CRC32)(uint64_t*, uint8_t const**)>
|
||||
uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
|
||||
|
||||
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
|
||||
const uint8_t *e = p + size;
|
||||
uint64_t l = crc ^ 0xffffffffu;
|
||||
@ -395,13 +395,14 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
|
||||
|
||||
// Detect if SS42 or not.
|
||||
#ifndef HAVE_POWER8
|
||||
|
||||
static bool isSSE42() {
|
||||
#ifndef HAVE_SSE42
|
||||
return false;
|
||||
#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
|
||||
uint32_t c_;
|
||||
__asm__("cpuid" : "=c"(c_) : "a"(1) : "ebx", "edx");
|
||||
return c_ & (1U << 20); // copied from CpuId.h in Folly.
|
||||
return c_ & (1U << 20); // copied from CpuId.h in Folly. Test SSE42
|
||||
#elif defined(_WIN64)
|
||||
int info[4];
|
||||
__cpuidex(info, 0x00000001, 0);
|
||||
@ -410,7 +411,26 @@ static bool isSSE42() {
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool isPCLMULQDQ() {
|
||||
#ifndef HAVE_SSE42
|
||||
// in build_detect_platform we set this macro when both SSE42 and PCLMULQDQ are
|
||||
// supported by compiler
|
||||
return false;
|
||||
#elif defined(__GNUC__) && defined(__x86_64__) && !defined(IOS_CROSS_COMPILE)
|
||||
uint32_t c_;
|
||||
__asm__("cpuid" : "=c"(c_) : "a"(1) : "ebx", "edx");
|
||||
return c_ & (1U << 1); // PCLMULQDQ is in bit 1 (not bit 0)
|
||||
#elif defined(_WIN64)
|
||||
int info[4];
|
||||
__cpuidex(info, 0x00000001, 0);
|
||||
return (info[2] & ((int)1 << 1)) != 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // HAVE_POWER8
|
||||
|
||||
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
|
||||
|
||||
@ -440,13 +460,6 @@ static bool isAltiVec() {
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline Function Choose_Extend() {
|
||||
#ifndef HAVE_POWER8
|
||||
return isSSE42() ? ExtendImpl<Fast_CRC32> : ExtendImpl<Slow_CRC32>;
|
||||
#else
|
||||
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string IsFastCrc32Supported() {
|
||||
bool has_fast_crc = false;
|
||||
@ -475,11 +488,572 @@ std::string IsFastCrc32Supported() {
|
||||
return fast_zero_msg;
|
||||
}
|
||||
|
||||
static Function ChosenExtend = Choose_Extend();
|
||||
|
||||
/*
|
||||
* Copyright 2016 Ferry Toth, Exalon Delft BV, The Netherlands
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the author be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
* Ferry Toth
|
||||
* ftoth@exalondelft.nl
|
||||
*
|
||||
* https://github.com/htot/crc32c
|
||||
*
|
||||
* Modified by Facebook
|
||||
*
|
||||
* Original intel whitepaper:
|
||||
* "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
|
||||
* https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
|
||||
*
|
||||
* This version is from the folly library, created by Dave Watson <davejwatson@fb.com>
|
||||
*
|
||||
*/
|
||||
#if defined HAVE_SSE42 && defined HAVE_PCLMUL
|
||||
|
||||
#define CRCtriplet(crc, buf, offset) \
|
||||
crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
|
||||
crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset)); \
|
||||
crc##2 = _mm_crc32_u64(crc##2, *(buf##2 + offset));
|
||||
|
||||
#define CRCduplet(crc, buf, offset) \
|
||||
crc##0 = _mm_crc32_u64(crc##0, *(buf##0 + offset)); \
|
||||
crc##1 = _mm_crc32_u64(crc##1, *(buf##1 + offset));
|
||||
|
||||
#define CRCsinglet(crc, buf, offset) \
|
||||
crc = _mm_crc32_u64(crc, *(uint64_t*)(buf + offset));
|
||||
|
||||
|
||||
// Numbers taken directly from intel whitepaper.
|
||||
// clang-format off
|
||||
const uint64_t clmul_constants[] = {
|
||||
0x14cd00bd6, 0x105ec76f0, 0x0ba4fc28e, 0x14cd00bd6,
|
||||
0x1d82c63da, 0x0f20c0dfe, 0x09e4addf8, 0x0ba4fc28e,
|
||||
0x039d3b296, 0x1384aa63a, 0x102f9b8a2, 0x1d82c63da,
|
||||
0x14237f5e6, 0x01c291d04, 0x00d3b6092, 0x09e4addf8,
|
||||
0x0c96cfdc0, 0x0740eef02, 0x18266e456, 0x039d3b296,
|
||||
0x0daece73e, 0x0083a6eec, 0x0ab7aff2a, 0x102f9b8a2,
|
||||
0x1248ea574, 0x1c1733996, 0x083348832, 0x14237f5e6,
|
||||
0x12c743124, 0x02ad91c30, 0x0b9e02b86, 0x00d3b6092,
|
||||
0x018b33a4e, 0x06992cea2, 0x1b331e26a, 0x0c96cfdc0,
|
||||
0x17d35ba46, 0x07e908048, 0x1bf2e8b8a, 0x18266e456,
|
||||
0x1a3e0968a, 0x11ed1f9d8, 0x0ce7f39f4, 0x0daece73e,
|
||||
0x061d82e56, 0x0f1d0f55e, 0x0d270f1a2, 0x0ab7aff2a,
|
||||
0x1c3f5f66c, 0x0a87ab8a8, 0x12ed0daac, 0x1248ea574,
|
||||
0x065863b64, 0x08462d800, 0x11eef4f8e, 0x083348832,
|
||||
0x1ee54f54c, 0x071d111a8, 0x0b3e32c28, 0x12c743124,
|
||||
0x0064f7f26, 0x0ffd852c6, 0x0dd7e3b0c, 0x0b9e02b86,
|
||||
0x0f285651c, 0x0dcb17aa4, 0x010746f3c, 0x018b33a4e,
|
||||
0x1c24afea4, 0x0f37c5aee, 0x0271d9844, 0x1b331e26a,
|
||||
0x08e766a0c, 0x06051d5a2, 0x093a5f730, 0x17d35ba46,
|
||||
0x06cb08e5c, 0x11d5ca20e, 0x06b749fb2, 0x1bf2e8b8a,
|
||||
0x1167f94f2, 0x021f3d99c, 0x0cec3662e, 0x1a3e0968a,
|
||||
0x19329634a, 0x08f158014, 0x0e6fc4e6a, 0x0ce7f39f4,
|
||||
0x08227bb8a, 0x1a5e82106, 0x0b0cd4768, 0x061d82e56,
|
||||
0x13c2b89c4, 0x188815ab2, 0x0d7a4825c, 0x0d270f1a2,
|
||||
0x10f5ff2ba, 0x105405f3e, 0x00167d312, 0x1c3f5f66c,
|
||||
0x0f6076544, 0x0e9adf796, 0x026f6a60a, 0x12ed0daac,
|
||||
0x1a2adb74e, 0x096638b34, 0x19d34af3a, 0x065863b64,
|
||||
0x049c3cc9c, 0x1e50585a0, 0x068bce87a, 0x11eef4f8e,
|
||||
0x1524fa6c6, 0x19f1c69dc, 0x16cba8aca, 0x1ee54f54c,
|
||||
0x042d98888, 0x12913343e, 0x1329d9f7e, 0x0b3e32c28,
|
||||
0x1b1c69528, 0x088f25a3a, 0x02178513a, 0x0064f7f26,
|
||||
0x0e0ac139e, 0x04e36f0b0, 0x0170076fa, 0x0dd7e3b0c,
|
||||
0x141a1a2e2, 0x0bd6f81f8, 0x16ad828b4, 0x0f285651c,
|
||||
0x041d17b64, 0x19425cbba, 0x1fae1cc66, 0x010746f3c,
|
||||
0x1a75b4b00, 0x18db37e8a, 0x0f872e54c, 0x1c24afea4,
|
||||
0x01e41e9fc, 0x04c144932, 0x086d8e4d2, 0x0271d9844,
|
||||
0x160f7af7a, 0x052148f02, 0x05bb8f1bc, 0x08e766a0c,
|
||||
0x0a90fd27a, 0x0a3c6f37a, 0x0b3af077a, 0x093a5f730,
|
||||
0x04984d782, 0x1d22c238e, 0x0ca6ef3ac, 0x06cb08e5c,
|
||||
0x0234e0b26, 0x063ded06a, 0x1d88abd4a, 0x06b749fb2,
|
||||
0x04597456a, 0x04d56973c, 0x0e9e28eb4, 0x1167f94f2,
|
||||
0x07b3ff57a, 0x19385bf2e, 0x0c9c8b782, 0x0cec3662e,
|
||||
0x13a9cba9e, 0x0e417f38a, 0x093e106a4, 0x19329634a,
|
||||
0x167001a9c, 0x14e727980, 0x1ddffc5d4, 0x0e6fc4e6a,
|
||||
0x00df04680, 0x0d104b8fc, 0x02342001e, 0x08227bb8a,
|
||||
0x00a2a8d7e, 0x05b397730, 0x168763fa6, 0x0b0cd4768,
|
||||
0x1ed5a407a, 0x0e78eb416, 0x0d2c3ed1a, 0x13c2b89c4,
|
||||
0x0995a5724, 0x1641378f0, 0x19b1afbc4, 0x0d7a4825c,
|
||||
0x109ffedc0, 0x08d96551c, 0x0f2271e60, 0x10f5ff2ba,
|
||||
0x00b0bf8ca, 0x00bf80dd2, 0x123888b7a, 0x00167d312,
|
||||
0x1e888f7dc, 0x18dcddd1c, 0x002ee03b2, 0x0f6076544,
|
||||
0x183e8d8fe, 0x06a45d2b2, 0x133d7a042, 0x026f6a60a,
|
||||
0x116b0f50c, 0x1dd3e10e8, 0x05fabe670, 0x1a2adb74e,
|
||||
0x130004488, 0x0de87806c, 0x000bcf5f6, 0x19d34af3a,
|
||||
0x18f0c7078, 0x014338754, 0x017f27698, 0x049c3cc9c,
|
||||
0x058ca5f00, 0x15e3e77ee, 0x1af900c24, 0x068bce87a,
|
||||
0x0b5cfca28, 0x0dd07448e, 0x0ded288f8, 0x1524fa6c6,
|
||||
0x059f229bc, 0x1d8048348, 0x06d390dec, 0x16cba8aca,
|
||||
0x037170390, 0x0a3e3e02c, 0x06353c1cc, 0x042d98888,
|
||||
0x0c4584f5c, 0x0d73c7bea, 0x1f16a3418, 0x1329d9f7e,
|
||||
0x0531377e2, 0x185137662, 0x1d8d9ca7c, 0x1b1c69528,
|
||||
0x0b25b29f2, 0x18a08b5bc, 0x19fb2a8b0, 0x02178513a,
|
||||
0x1a08fe6ac, 0x1da758ae0, 0x045cddf4e, 0x0e0ac139e,
|
||||
0x1a91647f2, 0x169cf9eb0, 0x1a0f717c4, 0x0170076fa,
|
||||
};
|
||||
|
||||
// Compute the crc32c value for buffer smaller than 8
|
||||
inline void align_to_8(
|
||||
size_t len,
|
||||
uint64_t& crc0, // crc so far, updated on return
|
||||
const unsigned char*& next) { // next data pointer, updated on return
|
||||
uint32_t crc32bit = static_cast<uint32_t>(crc0);
|
||||
if (len & 0x04) {
|
||||
crc32bit = _mm_crc32_u32(crc32bit, *(uint32_t*)next);
|
||||
next += sizeof(uint32_t);
|
||||
}
|
||||
if (len & 0x02) {
|
||||
crc32bit = _mm_crc32_u16(crc32bit, *(uint16_t*)next);
|
||||
next += sizeof(uint16_t);
|
||||
}
|
||||
if (len & 0x01) {
|
||||
crc32bit = _mm_crc32_u8(crc32bit, *(next));
|
||||
next++;
|
||||
}
|
||||
crc0 = crc32bit;
|
||||
}
|
||||
|
||||
//
|
||||
// CombineCRC performs pclmulqdq multiplication of 2 partial CRC's and a well
|
||||
// chosen constant and xor's these with the remaining CRC.
|
||||
//
|
||||
inline uint64_t CombineCRC(
|
||||
size_t block_size,
|
||||
uint64_t crc0,
|
||||
uint64_t crc1,
|
||||
uint64_t crc2,
|
||||
const uint64_t* next2) {
|
||||
const auto multiplier =
|
||||
*(reinterpret_cast<const __m128i*>(clmul_constants) + block_size - 1);
|
||||
const auto crc0_xmm = _mm_set_epi64x(0, crc0);
|
||||
const auto res0 = _mm_clmulepi64_si128(crc0_xmm, multiplier, 0x00);
|
||||
const auto crc1_xmm = _mm_set_epi64x(0, crc1);
|
||||
const auto res1 = _mm_clmulepi64_si128(crc1_xmm, multiplier, 0x10);
|
||||
const auto res = _mm_xor_si128(res0, res1);
|
||||
crc0 = _mm_cvtsi128_si64(res);
|
||||
crc0 = crc0 ^ *((uint64_t*)next2 - 1);
|
||||
crc2 = _mm_crc32_u64(crc2, crc0);
|
||||
return crc2;
|
||||
}
|
||||
|
||||
// Compute CRC-32C using the Intel hardware instruction.
|
||||
uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
|
||||
const unsigned char* next = (const unsigned char*)buf;
|
||||
uint64_t count;
|
||||
uint64_t crc0, crc1, crc2;
|
||||
crc0 = crc ^ 0xffffffffu;
|
||||
|
||||
|
||||
if (len >= 8) {
|
||||
// if len > 216 then align and use triplets
|
||||
if (len > 216) {
|
||||
{
|
||||
// Work on the bytes (< 8) before the first 8-byte alignment addr starts
|
||||
uint64_t align_bytes = (8 - (uintptr_t)next) & 7;
|
||||
len -= align_bytes;
|
||||
align_to_8(align_bytes, crc0, next);
|
||||
}
|
||||
|
||||
// Now work on the remaining blocks
|
||||
count = len / 24; // number of triplets
|
||||
len %= 24; // bytes remaining
|
||||
uint64_t n = count >> 7; // #blocks = first block + full blocks
|
||||
uint64_t block_size = count & 127;
|
||||
if (block_size == 0) {
|
||||
block_size = 128;
|
||||
} else {
|
||||
n++;
|
||||
}
|
||||
// points to the first byte of the next block
|
||||
const uint64_t* next0 = (uint64_t*)next + block_size;
|
||||
const uint64_t* next1 = next0 + block_size;
|
||||
const uint64_t* next2 = next1 + block_size;
|
||||
|
||||
crc1 = crc2 = 0;
|
||||
// Use Duff's device, a for() loop inside a switch()
|
||||
// statement. This needs to execute at least once, round len
|
||||
// down to nearest triplet multiple
|
||||
switch (block_size) {
|
||||
case 128:
|
||||
do {
|
||||
// jumps here for a full block of len 128
|
||||
CRCtriplet(crc, next, -128);
|
||||
case 127:
|
||||
// jumps here or below for the first block smaller
|
||||
CRCtriplet(crc, next, -127);
|
||||
case 126:
|
||||
CRCtriplet(crc, next, -126); // than 128
|
||||
case 125:
|
||||
CRCtriplet(crc, next, -125);
|
||||
case 124:
|
||||
CRCtriplet(crc, next, -124);
|
||||
case 123:
|
||||
CRCtriplet(crc, next, -123);
|
||||
case 122:
|
||||
CRCtriplet(crc, next, -122);
|
||||
case 121:
|
||||
CRCtriplet(crc, next, -121);
|
||||
case 120:
|
||||
CRCtriplet(crc, next, -120);
|
||||
case 119:
|
||||
CRCtriplet(crc, next, -119);
|
||||
case 118:
|
||||
CRCtriplet(crc, next, -118);
|
||||
case 117:
|
||||
CRCtriplet(crc, next, -117);
|
||||
case 116:
|
||||
CRCtriplet(crc, next, -116);
|
||||
case 115:
|
||||
CRCtriplet(crc, next, -115);
|
||||
case 114:
|
||||
CRCtriplet(crc, next, -114);
|
||||
case 113:
|
||||
CRCtriplet(crc, next, -113);
|
||||
case 112:
|
||||
CRCtriplet(crc, next, -112);
|
||||
case 111:
|
||||
CRCtriplet(crc, next, -111);
|
||||
case 110:
|
||||
CRCtriplet(crc, next, -110);
|
||||
case 109:
|
||||
CRCtriplet(crc, next, -109);
|
||||
case 108:
|
||||
CRCtriplet(crc, next, -108);
|
||||
case 107:
|
||||
CRCtriplet(crc, next, -107);
|
||||
case 106:
|
||||
CRCtriplet(crc, next, -106);
|
||||
case 105:
|
||||
CRCtriplet(crc, next, -105);
|
||||
case 104:
|
||||
CRCtriplet(crc, next, -104);
|
||||
case 103:
|
||||
CRCtriplet(crc, next, -103);
|
||||
case 102:
|
||||
CRCtriplet(crc, next, -102);
|
||||
case 101:
|
||||
CRCtriplet(crc, next, -101);
|
||||
case 100:
|
||||
CRCtriplet(crc, next, -100);
|
||||
case 99:
|
||||
CRCtriplet(crc, next, -99);
|
||||
case 98:
|
||||
CRCtriplet(crc, next, -98);
|
||||
case 97:
|
||||
CRCtriplet(crc, next, -97);
|
||||
case 96:
|
||||
CRCtriplet(crc, next, -96);
|
||||
case 95:
|
||||
CRCtriplet(crc, next, -95);
|
||||
case 94:
|
||||
CRCtriplet(crc, next, -94);
|
||||
case 93:
|
||||
CRCtriplet(crc, next, -93);
|
||||
case 92:
|
||||
CRCtriplet(crc, next, -92);
|
||||
case 91:
|
||||
CRCtriplet(crc, next, -91);
|
||||
case 90:
|
||||
CRCtriplet(crc, next, -90);
|
||||
case 89:
|
||||
CRCtriplet(crc, next, -89);
|
||||
case 88:
|
||||
CRCtriplet(crc, next, -88);
|
||||
case 87:
|
||||
CRCtriplet(crc, next, -87);
|
||||
case 86:
|
||||
CRCtriplet(crc, next, -86);
|
||||
case 85:
|
||||
CRCtriplet(crc, next, -85);
|
||||
case 84:
|
||||
CRCtriplet(crc, next, -84);
|
||||
case 83:
|
||||
CRCtriplet(crc, next, -83);
|
||||
case 82:
|
||||
CRCtriplet(crc, next, -82);
|
||||
case 81:
|
||||
CRCtriplet(crc, next, -81);
|
||||
case 80:
|
||||
CRCtriplet(crc, next, -80);
|
||||
case 79:
|
||||
CRCtriplet(crc, next, -79);
|
||||
case 78:
|
||||
CRCtriplet(crc, next, -78);
|
||||
case 77:
|
||||
CRCtriplet(crc, next, -77);
|
||||
case 76:
|
||||
CRCtriplet(crc, next, -76);
|
||||
case 75:
|
||||
CRCtriplet(crc, next, -75);
|
||||
case 74:
|
||||
CRCtriplet(crc, next, -74);
|
||||
case 73:
|
||||
CRCtriplet(crc, next, -73);
|
||||
case 72:
|
||||
CRCtriplet(crc, next, -72);
|
||||
case 71:
|
||||
CRCtriplet(crc, next, -71);
|
||||
case 70:
|
||||
CRCtriplet(crc, next, -70);
|
||||
case 69:
|
||||
CRCtriplet(crc, next, -69);
|
||||
case 68:
|
||||
CRCtriplet(crc, next, -68);
|
||||
case 67:
|
||||
CRCtriplet(crc, next, -67);
|
||||
case 66:
|
||||
CRCtriplet(crc, next, -66);
|
||||
case 65:
|
||||
CRCtriplet(crc, next, -65);
|
||||
case 64:
|
||||
CRCtriplet(crc, next, -64);
|
||||
case 63:
|
||||
CRCtriplet(crc, next, -63);
|
||||
case 62:
|
||||
CRCtriplet(crc, next, -62);
|
||||
case 61:
|
||||
CRCtriplet(crc, next, -61);
|
||||
case 60:
|
||||
CRCtriplet(crc, next, -60);
|
||||
case 59:
|
||||
CRCtriplet(crc, next, -59);
|
||||
case 58:
|
||||
CRCtriplet(crc, next, -58);
|
||||
case 57:
|
||||
CRCtriplet(crc, next, -57);
|
||||
case 56:
|
||||
CRCtriplet(crc, next, -56);
|
||||
case 55:
|
||||
CRCtriplet(crc, next, -55);
|
||||
case 54:
|
||||
CRCtriplet(crc, next, -54);
|
||||
case 53:
|
||||
CRCtriplet(crc, next, -53);
|
||||
case 52:
|
||||
CRCtriplet(crc, next, -52);
|
||||
case 51:
|
||||
CRCtriplet(crc, next, -51);
|
||||
case 50:
|
||||
CRCtriplet(crc, next, -50);
|
||||
case 49:
|
||||
CRCtriplet(crc, next, -49);
|
||||
case 48:
|
||||
CRCtriplet(crc, next, -48);
|
||||
case 47:
|
||||
CRCtriplet(crc, next, -47);
|
||||
case 46:
|
||||
CRCtriplet(crc, next, -46);
|
||||
case 45:
|
||||
CRCtriplet(crc, next, -45);
|
||||
case 44:
|
||||
CRCtriplet(crc, next, -44);
|
||||
case 43:
|
||||
CRCtriplet(crc, next, -43);
|
||||
case 42:
|
||||
CRCtriplet(crc, next, -42);
|
||||
case 41:
|
||||
CRCtriplet(crc, next, -41);
|
||||
case 40:
|
||||
CRCtriplet(crc, next, -40);
|
||||
case 39:
|
||||
CRCtriplet(crc, next, -39);
|
||||
case 38:
|
||||
CRCtriplet(crc, next, -38);
|
||||
case 37:
|
||||
CRCtriplet(crc, next, -37);
|
||||
case 36:
|
||||
CRCtriplet(crc, next, -36);
|
||||
case 35:
|
||||
CRCtriplet(crc, next, -35);
|
||||
case 34:
|
||||
CRCtriplet(crc, next, -34);
|
||||
case 33:
|
||||
CRCtriplet(crc, next, -33);
|
||||
case 32:
|
||||
CRCtriplet(crc, next, -32);
|
||||
case 31:
|
||||
CRCtriplet(crc, next, -31);
|
||||
case 30:
|
||||
CRCtriplet(crc, next, -30);
|
||||
case 29:
|
||||
CRCtriplet(crc, next, -29);
|
||||
case 28:
|
||||
CRCtriplet(crc, next, -28);
|
||||
case 27:
|
||||
CRCtriplet(crc, next, -27);
|
||||
case 26:
|
||||
CRCtriplet(crc, next, -26);
|
||||
case 25:
|
||||
CRCtriplet(crc, next, -25);
|
||||
case 24:
|
||||
CRCtriplet(crc, next, -24);
|
||||
case 23:
|
||||
CRCtriplet(crc, next, -23);
|
||||
case 22:
|
||||
CRCtriplet(crc, next, -22);
|
||||
case 21:
|
||||
CRCtriplet(crc, next, -21);
|
||||
case 20:
|
||||
CRCtriplet(crc, next, -20);
|
||||
case 19:
|
||||
CRCtriplet(crc, next, -19);
|
||||
case 18:
|
||||
CRCtriplet(crc, next, -18);
|
||||
case 17:
|
||||
CRCtriplet(crc, next, -17);
|
||||
case 16:
|
||||
CRCtriplet(crc, next, -16);
|
||||
case 15:
|
||||
CRCtriplet(crc, next, -15);
|
||||
case 14:
|
||||
CRCtriplet(crc, next, -14);
|
||||
case 13:
|
||||
CRCtriplet(crc, next, -13);
|
||||
case 12:
|
||||
CRCtriplet(crc, next, -12);
|
||||
case 11:
|
||||
CRCtriplet(crc, next, -11);
|
||||
case 10:
|
||||
CRCtriplet(crc, next, -10);
|
||||
case 9:
|
||||
CRCtriplet(crc, next, -9);
|
||||
case 8:
|
||||
CRCtriplet(crc, next, -8);
|
||||
case 7:
|
||||
CRCtriplet(crc, next, -7);
|
||||
case 6:
|
||||
CRCtriplet(crc, next, -6);
|
||||
case 5:
|
||||
CRCtriplet(crc, next, -5);
|
||||
case 4:
|
||||
CRCtriplet(crc, next, -4);
|
||||
case 3:
|
||||
CRCtriplet(crc, next, -3);
|
||||
case 2:
|
||||
CRCtriplet(crc, next, -2);
|
||||
case 1:
|
||||
CRCduplet(crc, next, -1); // the final triplet is actually only 2
|
||||
//{ CombineCRC(); }
|
||||
crc0 = CombineCRC(block_size, crc0, crc1, crc2, next2);
|
||||
if (--n > 0) {
|
||||
crc1 = crc2 = 0;
|
||||
block_size = 128;
|
||||
// points to the first byte of the next block
|
||||
next0 = next2 + 128;
|
||||
next1 = next0 + 128; // from here on all blocks are 128 long
|
||||
next2 = next1 + 128;
|
||||
}
|
||||
case 0:;
|
||||
} while (n > 0);
|
||||
}
|
||||
next = (const unsigned char*)next2;
|
||||
}
|
||||
uint64_t count2 = len >> 3; // 216 of less bytes is 27 or less singlets
|
||||
len = len & 7;
|
||||
next += (count2 * 8);
|
||||
switch (count2) {
|
||||
case 27:
|
||||
CRCsinglet(crc0, next, -27 * 8);
|
||||
case 26:
|
||||
CRCsinglet(crc0, next, -26 * 8);
|
||||
case 25:
|
||||
CRCsinglet(crc0, next, -25 * 8);
|
||||
case 24:
|
||||
CRCsinglet(crc0, next, -24 * 8);
|
||||
case 23:
|
||||
CRCsinglet(crc0, next, -23 * 8);
|
||||
case 22:
|
||||
CRCsinglet(crc0, next, -22 * 8);
|
||||
case 21:
|
||||
CRCsinglet(crc0, next, -21 * 8);
|
||||
case 20:
|
||||
CRCsinglet(crc0, next, -20 * 8);
|
||||
case 19:
|
||||
CRCsinglet(crc0, next, -19 * 8);
|
||||
case 18:
|
||||
CRCsinglet(crc0, next, -18 * 8);
|
||||
case 17:
|
||||
CRCsinglet(crc0, next, -17 * 8);
|
||||
case 16:
|
||||
CRCsinglet(crc0, next, -16 * 8);
|
||||
case 15:
|
||||
CRCsinglet(crc0, next, -15 * 8);
|
||||
case 14:
|
||||
CRCsinglet(crc0, next, -14 * 8);
|
||||
case 13:
|
||||
CRCsinglet(crc0, next, -13 * 8);
|
||||
case 12:
|
||||
CRCsinglet(crc0, next, -12 * 8);
|
||||
case 11:
|
||||
CRCsinglet(crc0, next, -11 * 8);
|
||||
case 10:
|
||||
CRCsinglet(crc0, next, -10 * 8);
|
||||
case 9:
|
||||
CRCsinglet(crc0, next, -9 * 8);
|
||||
case 8:
|
||||
CRCsinglet(crc0, next, -8 * 8);
|
||||
case 7:
|
||||
CRCsinglet(crc0, next, -7 * 8);
|
||||
case 6:
|
||||
CRCsinglet(crc0, next, -6 * 8);
|
||||
case 5:
|
||||
CRCsinglet(crc0, next, -5 * 8);
|
||||
case 4:
|
||||
CRCsinglet(crc0, next, -4 * 8);
|
||||
case 3:
|
||||
CRCsinglet(crc0, next, -3 * 8);
|
||||
case 2:
|
||||
CRCsinglet(crc0, next, -2 * 8);
|
||||
case 1:
|
||||
CRCsinglet(crc0, next, -1 * 8);
|
||||
case 0:;
|
||||
}
|
||||
}
|
||||
{
|
||||
align_to_8(len, crc0, next);
|
||||
return (uint32_t)crc0 ^ 0xffffffffu;
|
||||
}
|
||||
}
|
||||
|
||||
#endif //HAVE_SSE42 && HAVE_PCLMUL
|
||||
|
||||
static inline Function Choose_Extend() {
|
||||
#ifndef HAVE_POWER8
|
||||
if (isSSE42()) {
|
||||
if (isPCLMULQDQ()) {
|
||||
#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
|
||||
return crc32c_3way;
|
||||
#else
|
||||
return ExtendImpl<Fast_CRC32>; // Fast_CRC32 will check HAVE_SSE42 itself
|
||||
#endif
|
||||
}
|
||||
else { // no runtime PCLMULQDQ support but has SSE42 support
|
||||
return ExtendImpl<Fast_CRC32>;
|
||||
}
|
||||
} // end of isSSE42()
|
||||
else {
|
||||
return ExtendImpl<Slow_CRC32>;
|
||||
}
|
||||
#else //HAVE_POWER8
|
||||
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
|
||||
#endif
|
||||
}
|
||||
|
||||
static Function ChosenExtend = Choose_Extend();
|
||||
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||
return ChosenExtend(crc, buf, size);
|
||||
}
|
||||
|
||||
|
||||
} // namespace crc32c
|
||||
} // namespace rocksdb
|
||||
|
@ -6,7 +6,6 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "util/crc32c.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
@ -15,7 +14,57 @@ namespace crc32c {
|
||||
|
||||
class CRC { };
|
||||
|
||||
|
||||
// Tests for 3-way crc32c algorithm. We need these tests because it uses
|
||||
// different lookup tables than the original Fast_CRC32
|
||||
const unsigned int BUFFER_SIZE = 512 * 1024 * sizeof(uint64_t);
|
||||
char buffer[BUFFER_SIZE];
|
||||
|
||||
struct ExpectedResult {
|
||||
size_t offset;
|
||||
size_t length;
|
||||
uint32_t crc32c;
|
||||
};
|
||||
|
||||
ExpectedResult expectedResults[] = {
|
||||
// Zero-byte input
|
||||
{ 0, 0, ~0U },
|
||||
// Small aligned inputs to test special cases in SIMD implementations
|
||||
{ 8, 1, 1543413366 },
|
||||
{ 8, 2, 523493126 },
|
||||
{ 8, 3, 1560427360 },
|
||||
{ 8, 4, 3422504776 },
|
||||
{ 8, 5, 447841138 },
|
||||
{ 8, 6, 3910050499 },
|
||||
{ 8, 7, 3346241981 },
|
||||
// Small unaligned inputs
|
||||
{ 9, 1, 3855826643 },
|
||||
{ 10, 2, 560880875 },
|
||||
{ 11, 3, 1479707779 },
|
||||
{ 12, 4, 2237687071 },
|
||||
{ 13, 5, 4063855784 },
|
||||
{ 14, 6, 2553454047 },
|
||||
{ 15, 7, 1349220140 },
|
||||
// Larger inputs to test leftover chunks at the end of aligned blocks
|
||||
{ 8, 8, 627613930 },
|
||||
{ 8, 9, 2105929409 },
|
||||
{ 8, 10, 2447068514 },
|
||||
{ 8, 11, 863807079 },
|
||||
{ 8, 12, 292050879 },
|
||||
{ 8, 13, 1411837737 },
|
||||
{ 8, 14, 2614515001 },
|
||||
{ 8, 15, 3579076296 },
|
||||
{ 8, 16, 2897079161 },
|
||||
{ 8, 17, 675168386 },
|
||||
// // Much larger inputs
|
||||
{ 0, BUFFER_SIZE, 2096790750 },
|
||||
{ 1, BUFFER_SIZE / 2, 3854797577 },
|
||||
|
||||
};
|
||||
|
||||
TEST(CRC, StandardResults) {
|
||||
|
||||
// Original Fast_CRC32 tests.
|
||||
// From rfc3720 section B.4.
|
||||
char buf[32];
|
||||
|
||||
@ -50,6 +99,24 @@ TEST(CRC, StandardResults) {
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
ASSERT_EQ(0xd9963a56, Value(reinterpret_cast<char*>(data), sizeof(data)));
|
||||
|
||||
// 3-Way Crc32c tests ported from folly.
|
||||
// Test 1: single computation
|
||||
for (auto expected : expectedResults) {
|
||||
uint32_t result = Value(buffer + expected.offset, expected.length);
|
||||
EXPECT_EQ(~expected.crc32c, result);
|
||||
}
|
||||
|
||||
// Test 2: stitching two computations
|
||||
for (auto expected : expectedResults) {
|
||||
size_t partialLength = expected.length / 2;
|
||||
uint32_t partialChecksum = Value(buffer + expected.offset, partialLength);
|
||||
uint32_t result = Extend(partialChecksum,
|
||||
buffer + expected.offset + partialLength,
|
||||
expected.length - partialLength);
|
||||
EXPECT_EQ(~expected.crc32c, result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST(CRC, Values) {
|
||||
@ -72,7 +139,36 @@ TEST(CRC, Mask) {
|
||||
} // namespace crc32c
|
||||
} // namespace rocksdb
|
||||
|
||||
// copied from folly
|
||||
const uint64_t FNV_64_HASH_START = 14695981039346656037ULL;
|
||||
inline uint64_t fnv64_buf(const void* buf,
|
||||
size_t n,
|
||||
uint64_t hash = FNV_64_HASH_START) {
|
||||
// forcing signed char, since other platforms can use unsigned
|
||||
const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
|
||||
(hash << 8) + (hash << 40);
|
||||
hash ^= char_buf[i];
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
// Populate a buffer with a deterministic pattern
|
||||
// on which to compute checksums
|
||||
|
||||
const uint8_t* src = (uint8_t*)rocksdb::crc32c::buffer;
|
||||
uint64_t* dst = (uint64_t*)rocksdb::crc32c::buffer;
|
||||
const uint64_t* end = (const uint64_t*)(rocksdb::crc32c::buffer + rocksdb::crc32c::BUFFER_SIZE);
|
||||
*dst++ = 0;
|
||||
while (dst < end) {
|
||||
*dst++ = fnv64_buf((const char*)src, sizeof(uint64_t));
|
||||
src += sizeof(uint64_t);
|
||||
}
|
||||
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ bool RandomTransactionInserter::DoInsert(DB* db, Transaction* txn,
|
||||
bool unexpected_error = false;
|
||||
|
||||
std::vector<uint16_t> set_vec(num_sets_);
|
||||
std::iota(set_vec.begin(), set_vec.end(), 0);
|
||||
std::iota(set_vec.begin(), set_vec.end(), static_cast<uint16_t>(0));
|
||||
std::random_shuffle(set_vec.begin(), set_vec.end(),
|
||||
[&](uint64_t r) { return rand_->Uniform(r); });
|
||||
// For each set, pick a key at random and increment it
|
||||
@ -254,7 +254,7 @@ Status RandomTransactionInserter::Verify(DB* db, uint16_t num_sets,
|
||||
}
|
||||
|
||||
std::vector<uint16_t> set_vec(num_sets);
|
||||
std::iota(set_vec.begin(), set_vec.end(), 0);
|
||||
std::iota(set_vec.begin(), set_vec.end(), static_cast<uint16_t>(0));
|
||||
if (rand) {
|
||||
std::random_shuffle(set_vec.begin(), set_vec.end(),
|
||||
[&](uint64_t r) { return rand->Uniform(r); });
|
||||
|
@ -73,6 +73,8 @@ void BlobDBOptions::Dump(Logger* log) const {
|
||||
blob_dir_size);
|
||||
ROCKS_LOG_HEADER(log, " blob_db_options.ttl_range_secs: %" PRIu32,
|
||||
ttl_range_secs);
|
||||
ROCKS_LOG_HEADER(log, " blob_db_options.min_blob_size: %" PRIu64,
|
||||
min_blob_size);
|
||||
ROCKS_LOG_HEADER(log, " blob_db_options.bytes_per_sync: %" PRIu64,
|
||||
bytes_per_sync);
|
||||
ROCKS_LOG_HEADER(log, " blob_db_options.blob_file_size: %" PRIu64,
|
||||
|
@ -56,8 +56,10 @@ struct BlobDBOptions {
|
||||
// will be inlined in base DB together with the key.
|
||||
uint64_t min_blob_size = 0;
|
||||
|
||||
// at what bytes will the blob files be synced to blob log.
|
||||
uint64_t bytes_per_sync = 0;
|
||||
// Allows OS to incrementally sync blob files to disk for every
|
||||
// bytes_per_sync bytes written. Users shouldn't rely on it for
|
||||
// persistency guarantee.
|
||||
uint64_t bytes_per_sync = 512 * 1024;
|
||||
|
||||
// the target size of each blob file. File will become immutable
|
||||
// after it exceeds that size
|
||||
@ -76,6 +78,13 @@ struct BlobDBOptions {
|
||||
// blob files will be cleanup based on TTL.
|
||||
bool enable_garbage_collection = false;
|
||||
|
||||
// Time interval to trigger garbage collection, in seconds.
|
||||
uint64_t garbage_collection_interval_secs = 60;
|
||||
|
||||
// If garbage collection is enabled, blob files with deleted size no less
|
||||
// than this ratio will become candidates to be cleanup.
|
||||
double garbage_collection_deletion_size_threshold = 0.75;
|
||||
|
||||
// Disable all background job. Used for test only.
|
||||
bool disable_background_tasks = false;
|
||||
|
||||
@ -203,6 +212,8 @@ class BlobDB : public StackableDB {
|
||||
|
||||
virtual BlobDBOptions GetBlobDBOptions() const = 0;
|
||||
|
||||
virtual Status SyncBlobFiles() = 0;
|
||||
|
||||
virtual ~BlobDB() {}
|
||||
|
||||
protected:
|
||||
|
@ -111,7 +111,6 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
|
||||
cf_options_(cf_options),
|
||||
env_options_(db_options),
|
||||
statistics_(db_options_.statistics.get()),
|
||||
dir_change_(false),
|
||||
next_file_number_(1),
|
||||
epoch_of_(0),
|
||||
shutdown_(false),
|
||||
@ -124,6 +123,7 @@ BlobDBImpl::BlobDBImpl(const std::string& dbname,
|
||||
blob_dir_ = (bdb_options_.path_relative)
|
||||
? dbname + "/" + bdb_options_.blob_dir
|
||||
: bdb_options_.blob_dir;
|
||||
env_options_.bytes_per_sync = blob_db_options.bytes_per_sync;
|
||||
}
|
||||
|
||||
BlobDBImpl::~BlobDBImpl() {
|
||||
@ -202,6 +202,7 @@ Status BlobDBImpl::Open(std::vector<ColumnFamilyHandle*>* handles) {
|
||||
}
|
||||
|
||||
ROCKS_LOG_INFO(db_options_.info_log, "BlobDB pointer %p", this);
|
||||
bdb_options_.Dump(db_options_.info_log.get());
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -210,7 +211,8 @@ void BlobDBImpl::StartBackgroundTasks() {
|
||||
tqueue_.add(
|
||||
kReclaimOpenFilesPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::ReclaimOpenFiles, this, std::placeholders::_1));
|
||||
tqueue_.add(kGCCheckPeriodMillisecs,
|
||||
tqueue_.add(static_cast<int64_t>(
|
||||
bdb_options_.garbage_collection_interval_secs * 1000),
|
||||
std::bind(&BlobDBImpl::RunGC, this, std::placeholders::_1));
|
||||
if (bdb_options_.enable_garbage_collection) {
|
||||
tqueue_.add(
|
||||
@ -225,8 +227,6 @@ void BlobDBImpl::StartBackgroundTasks() {
|
||||
std::bind(&BlobDBImpl::DeleteObsoleteFiles, this, std::placeholders::_1));
|
||||
tqueue_.add(kSanityCheckPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::SanityCheck, this, std::placeholders::_1));
|
||||
tqueue_.add(kFSyncFilesPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::FsyncFiles, this, std::placeholders::_1));
|
||||
tqueue_.add(
|
||||
kCheckSeqFilesPeriodMillisecs,
|
||||
std::bind(&BlobDBImpl::CheckSeqFiles, this, std::placeholders::_1));
|
||||
@ -472,7 +472,6 @@ std::shared_ptr<BlobFile> BlobDBImpl::SelectBlobFile() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
dir_change_.store(true);
|
||||
blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile));
|
||||
open_non_ttl_file_ = bfile;
|
||||
return bfile;
|
||||
@ -547,7 +546,6 @@ std::shared_ptr<BlobFile> BlobDBImpl::SelectBlobFileTTL(uint64_t expiration) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
dir_change_.store(true);
|
||||
blob_files_.insert(std::make_pair(bfile->BlobFileNumber(), bfile));
|
||||
open_ttl_files_.insert(bfile);
|
||||
epoch_of_++;
|
||||
@ -571,18 +569,14 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
|
||||
const WriteOptions& options_;
|
||||
BlobDBImpl* blob_db_impl_;
|
||||
uint32_t default_cf_id_;
|
||||
SequenceNumber sequence_;
|
||||
WriteBatch batch_;
|
||||
|
||||
public:
|
||||
BlobInserter(const WriteOptions& options, BlobDBImpl* blob_db_impl,
|
||||
uint32_t default_cf_id, SequenceNumber seq)
|
||||
uint32_t default_cf_id)
|
||||
: options_(options),
|
||||
blob_db_impl_(blob_db_impl),
|
||||
default_cf_id_(default_cf_id),
|
||||
sequence_(seq) {}
|
||||
|
||||
SequenceNumber sequence() { return sequence_; }
|
||||
default_cf_id_(default_cf_id) {}
|
||||
|
||||
WriteBatch* batch() { return &batch_; }
|
||||
|
||||
@ -597,8 +591,7 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
|
||||
uint64_t expiration =
|
||||
blob_db_impl_->ExtractExpiration(key, value, &value_slice, &new_value);
|
||||
Status s = blob_db_impl_->PutBlobValue(options_, key, value_slice,
|
||||
expiration, sequence_, &batch_);
|
||||
sequence_++;
|
||||
expiration, &batch_);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -609,7 +602,6 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
|
||||
"Blob DB doesn't support non-default column family.");
|
||||
}
|
||||
Status s = WriteBatchInternal::Delete(&batch_, column_family_id, key);
|
||||
sequence_++;
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -621,7 +613,6 @@ class BlobDBImpl::BlobInserter : public WriteBatch::Handler {
|
||||
}
|
||||
Status s = WriteBatchInternal::DeleteRange(&batch_, column_family_id,
|
||||
begin_key, end_key);
|
||||
sequence_++;
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -643,12 +634,8 @@ Status BlobDBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
||||
RecordTick(statistics_, BLOB_DB_NUM_WRITE);
|
||||
uint32_t default_cf_id =
|
||||
reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily())->GetID();
|
||||
// TODO(yiwu): In case there are multiple writers the latest sequence would
|
||||
// not be the actually sequence we are writting. Need to get the sequence
|
||||
// from write batch after DB write instead.
|
||||
SequenceNumber current_seq = GetLatestSequenceNumber() + 1;
|
||||
Status s;
|
||||
BlobInserter blob_inserter(options, this, default_cf_id, current_seq);
|
||||
BlobInserter blob_inserter(options, this, default_cf_id);
|
||||
{
|
||||
// Release write_mutex_ before DB write to avoid race condition with
|
||||
// flush begin listener, which also require write_mutex_ to sync
|
||||
@ -693,6 +680,8 @@ Status BlobDBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
||||
|
||||
if (bdb_options_.enable_garbage_collection) {
|
||||
// add deleted key to list of keys that have been deleted for book-keeping
|
||||
SequenceNumber current_seq =
|
||||
WriteBatchInternal::Sequence(blob_inserter.batch());
|
||||
DeleteBookkeeper delete_bookkeeper(this, current_seq);
|
||||
s = updates->Iterate(&delete_bookkeeper);
|
||||
}
|
||||
@ -761,11 +750,7 @@ Status BlobDBImpl::PutUntil(const WriteOptions& options, const Slice& key,
|
||||
// flush begin listener, which also require write_mutex_ to sync
|
||||
// blob files.
|
||||
MutexLock l(&write_mutex_);
|
||||
// TODO(yiwu): In case there are multiple writers the latest sequence would
|
||||
// not be the actually sequence we are writting. Need to get the sequence
|
||||
// from write batch after DB write instead.
|
||||
SequenceNumber sequence = GetLatestSequenceNumber() + 1;
|
||||
s = PutBlobValue(options, key, value, expiration, sequence, &batch);
|
||||
s = PutBlobValue(options, key, value, expiration, &batch);
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = db_->Write(options, &batch);
|
||||
@ -776,7 +761,7 @@ Status BlobDBImpl::PutUntil(const WriteOptions& options, const Slice& key,
|
||||
|
||||
Status BlobDBImpl::PutBlobValue(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value, uint64_t expiration,
|
||||
SequenceNumber sequence, WriteBatch* batch) {
|
||||
WriteBatch* batch) {
|
||||
Status s;
|
||||
std::string index_entry;
|
||||
uint32_t column_family_id =
|
||||
@ -817,7 +802,6 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& options, const Slice& key,
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
bfile->ExtendSequenceRange(sequence);
|
||||
if (expiration != kNoExpiration) {
|
||||
bfile->ExtendExpirationRange(expiration);
|
||||
}
|
||||
@ -876,6 +860,9 @@ std::shared_ptr<BlobFile> BlobDBImpl::GetOldestBlobFile() {
|
||||
CopyBlobFiles(&blob_files, [](const std::shared_ptr<BlobFile>& f) {
|
||||
return !f->Obsolete() && f->Immutable();
|
||||
});
|
||||
if (blob_files.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
blobf_compare_ttl compare;
|
||||
return *std::min_element(blob_files.begin(), blob_files.end(), compare);
|
||||
}
|
||||
@ -898,7 +885,7 @@ bool BlobDBImpl::EvictOldestBlobFile() {
|
||||
total_blob_space_.load(), bdb_options_.blob_dir_size,
|
||||
oldest_file->BlobFileNumber(), expiration_range.first,
|
||||
expiration_range.second);
|
||||
oldest_file->MarkObsolete(oldest_file->GetSequenceRange().second);
|
||||
oldest_file->MarkObsolete(GetLatestSequenceNumber());
|
||||
obsolete_files_.push_back(oldest_file);
|
||||
oldest_file_evicted_.store(true);
|
||||
RecordTick(statistics_, BLOB_DB_FIFO_NUM_FILES_EVICTED);
|
||||
@ -906,6 +893,7 @@ bool BlobDBImpl::EvictOldestBlobFile() {
|
||||
oldest_file->BlobCount());
|
||||
RecordTick(statistics_, BLOB_DB_FIFO_BYTES_EVICTED,
|
||||
oldest_file->GetFileSize());
|
||||
TEST_SYNC_POINT("BlobDBImpl::EvictOldestBlobFile:Evicted");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1072,58 +1060,58 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
std::shared_ptr<RandomAccessFileReader> reader =
|
||||
GetOrOpenRandomAccessReader(bfile, env_, env_options_);
|
||||
|
||||
std::string* valueptr = value->GetSelf();
|
||||
std::string value_c;
|
||||
if (bdb_options_.compression != kNoCompression) {
|
||||
valueptr = &value_c;
|
||||
}
|
||||
assert(blob_index.offset() > key.size() + sizeof(uint32_t));
|
||||
uint64_t record_offset = blob_index.offset() - key.size() - sizeof(uint32_t);
|
||||
uint64_t record_size = sizeof(uint32_t) + key.size() + blob_index.size();
|
||||
|
||||
// Allocate the buffer. This is safe in C++11
|
||||
// Note that std::string::reserved() does not work, since previous value
|
||||
// of the buffer can be larger than blob_index.size().
|
||||
valueptr->resize(blob_index.size());
|
||||
char* buffer = &(*valueptr)[0];
|
||||
std::string buffer_str(record_size, static_cast<char>(0));
|
||||
char* buffer = &buffer_str[0];
|
||||
|
||||
Slice blob_value;
|
||||
// A partial blob record contain checksum, key and value.
|
||||
Slice blob_record;
|
||||
{
|
||||
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
|
||||
s = reader->Read(blob_index.offset(), blob_index.size(), &blob_value,
|
||||
buffer);
|
||||
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_value.size());
|
||||
s = reader->Read(record_offset, record_size, &blob_record, buffer);
|
||||
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size());
|
||||
}
|
||||
if (!s.ok() || blob_value.size() != blob_index.size()) {
|
||||
if (debug_level_ >= 2) {
|
||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||
"Failed to read blob from file: %s blob_offset: %" PRIu64
|
||||
" blob_size: %" PRIu64 " read: %d key: %s status: '%s'",
|
||||
bfile->PathName().c_str(), blob_index.offset(),
|
||||
blob_index.size(), static_cast<int>(blob_value.size()),
|
||||
key.data(), s.ToString().c_str());
|
||||
}
|
||||
return Status::NotFound("Blob Not Found as couldnt retrieve Blob");
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||
"Failed to read blob from blob file %" PRIu64
|
||||
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||
", key_size: " PRIu64 ", read " PRIu64
|
||||
"bytes, status: '%s'",
|
||||
bfile->BlobFileNumber(), blob_index.offset(),
|
||||
blob_index.size(), key.size(), s.ToString().c_str());
|
||||
return s;
|
||||
}
|
||||
if (blob_record.size() != record_size) {
|
||||
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||
"Failed to read blob from blob file %" PRIu64
|
||||
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||
", key_size: " PRIu64 ", read " PRIu64
|
||||
"bytes, status: '%s'",
|
||||
bfile->BlobFileNumber(), blob_index.offset(),
|
||||
blob_index.size(), key.size(), s.ToString().c_str());
|
||||
|
||||
// TODO(yiwu): Add an option to skip crc checking.
|
||||
Slice crc_slice;
|
||||
return Status::Corruption("Failed to retrieve blob from blob index.");
|
||||
}
|
||||
Slice crc_slice(blob_record.data(), sizeof(uint32_t));
|
||||
Slice blob_value(blob_record.data() + sizeof(uint32_t) + key.size(),
|
||||
blob_index.size());
|
||||
uint32_t crc_exp;
|
||||
std::string crc_str;
|
||||
crc_str.resize(sizeof(uint32_t));
|
||||
char* crc_buffer = &(crc_str[0]);
|
||||
s = reader->Read(blob_index.offset() - (key.size() + sizeof(uint32_t)),
|
||||
sizeof(uint32_t), &crc_slice, crc_buffer);
|
||||
if (!s.ok() || !GetFixed32(&crc_slice, &crc_exp)) {
|
||||
if (debug_level_ >= 2) {
|
||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||
"Failed to fetch blob crc file: %s blob_offset: %" PRIu64
|
||||
" blob_size: %" PRIu64 " key: %s status: '%s'",
|
||||
bfile->PathName().c_str(), blob_index.offset(),
|
||||
blob_index.size(), key.data(), s.ToString().c_str());
|
||||
}
|
||||
return Status::NotFound("Blob Not Found as couldnt retrieve CRC");
|
||||
if (!GetFixed32(&crc_slice, &crc_exp)) {
|
||||
ROCKS_LOG_DEBUG(db_options_.info_log,
|
||||
"Unable to decode CRC from blob file %" PRIu64
|
||||
", blob_offset: %" PRIu64 ", blob_size: %" PRIu64
|
||||
", key size: %" PRIu64 ", status: '%s'",
|
||||
bfile->BlobFileNumber(), blob_index.offset(),
|
||||
blob_index.size(), key.size(), s.ToString().c_str());
|
||||
return Status::Corruption("Unable to decode checksum.");
|
||||
}
|
||||
|
||||
uint32_t crc = crc32c::Value(key.data(), key.size());
|
||||
crc = crc32c::Extend(crc, blob_value.data(), blob_value.size());
|
||||
uint32_t crc = crc32c::Value(blob_record.data() + sizeof(uint32_t),
|
||||
blob_record.size() - sizeof(uint32_t));
|
||||
crc = crc32c::Mask(crc); // Adjust for storage
|
||||
if (crc != crc_exp) {
|
||||
if (debug_level_ >= 2) {
|
||||
@ -1136,7 +1124,9 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
return Status::Corruption("Corruption. Blob CRC mismatch");
|
||||
}
|
||||
|
||||
if (bfile->compression() != kNoCompression) {
|
||||
if (bfile->compression() == kNoCompression) {
|
||||
value->PinSelf(blob_value);
|
||||
} else {
|
||||
BlockContents contents;
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(DefaultColumnFamily());
|
||||
{
|
||||
@ -1147,11 +1137,9 @@ Status BlobDBImpl::GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
kBlockBasedTableVersionFormat, Slice(), bfile->compression(),
|
||||
*(cfh->cfd()->ioptions()));
|
||||
}
|
||||
*(value->GetSelf()) = contents.data.ToString();
|
||||
value->PinSelf(contents.data);
|
||||
}
|
||||
|
||||
value->PinSelf();
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1271,9 +1259,26 @@ Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile) {
|
||||
bool BlobDBImpl::VisibleToActiveSnapshot(
|
||||
const std::shared_ptr<BlobFile>& bfile) {
|
||||
assert(bfile->Obsolete());
|
||||
SequenceNumber first_sequence = bfile->GetSequenceRange().first;
|
||||
|
||||
// We check whether the oldest snapshot is no less than the last sequence
|
||||
// by the time the blob file become obsolete. If so, the blob file is not
|
||||
// visible to all existing snapshots.
|
||||
//
|
||||
// If we keep track of the earliest sequence of the keys in the blob file,
|
||||
// we could instead check if there's a snapshot falls in range
|
||||
// [earliest_sequence, obsolete_sequence). But doing so will make the
|
||||
// implementation more complicated.
|
||||
SequenceNumber obsolete_sequence = bfile->GetObsoleteSequence();
|
||||
return db_impl_->HasActiveSnapshotInRange(first_sequence, obsolete_sequence);
|
||||
SequenceNumber oldest_snapshot = 0;
|
||||
{
|
||||
// Need to lock DBImpl mutex before access snapshot list.
|
||||
InstrumentedMutexLock l(db_impl_->mutex());
|
||||
auto snapshots = db_impl_->snapshots();
|
||||
if (!snapshots.empty()) {
|
||||
oldest_snapshot = snapshots.oldest()->GetSequenceNumber();
|
||||
}
|
||||
}
|
||||
return oldest_snapshot < obsolete_sequence;
|
||||
}
|
||||
|
||||
bool BlobDBImpl::FindFileAndEvictABlob(uint64_t file_number, uint64_t key_size,
|
||||
@ -1431,14 +1436,6 @@ std::pair<bool, int64_t> BlobDBImpl::CheckSeqFiles(bool aborted) {
|
||||
return std::make_pair(true, -1);
|
||||
}
|
||||
|
||||
std::pair<bool, int64_t> BlobDBImpl::FsyncFiles(bool aborted) {
|
||||
if (aborted || shutdown_) {
|
||||
return std::make_pair(false, -1);
|
||||
}
|
||||
SyncBlobFiles();
|
||||
return std::make_pair(true, -1);
|
||||
}
|
||||
|
||||
Status BlobDBImpl::SyncBlobFiles() {
|
||||
MutexLock l(&write_mutex_);
|
||||
|
||||
@ -1446,35 +1443,30 @@ Status BlobDBImpl::SyncBlobFiles() {
|
||||
{
|
||||
ReadLock rl(&mutex_);
|
||||
for (auto fitr : open_ttl_files_) {
|
||||
if (fitr->NeedsFsync(true, bdb_options_.bytes_per_sync))
|
||||
process_files.push_back(fitr);
|
||||
process_files.push_back(fitr);
|
||||
}
|
||||
|
||||
if (open_non_ttl_file_ != nullptr &&
|
||||
open_non_ttl_file_->NeedsFsync(true, bdb_options_.bytes_per_sync)) {
|
||||
if (open_non_ttl_file_ != nullptr) {
|
||||
process_files.push_back(open_non_ttl_file_);
|
||||
}
|
||||
}
|
||||
|
||||
Status s;
|
||||
|
||||
for (auto& blob_file : process_files) {
|
||||
if (blob_file->NeedsFsync(true, bdb_options_.bytes_per_sync)) {
|
||||
s = blob_file->Fsync();
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||
"Failed to sync blob file %" PRIu64 ", status: %s",
|
||||
blob_file->BlobFileNumber(), s.ToString().c_str());
|
||||
return s;
|
||||
}
|
||||
s = blob_file->Fsync();
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||
"Failed to sync blob file %" PRIu64 ", status: %s",
|
||||
blob_file->BlobFileNumber(), s.ToString().c_str());
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
bool expected = true;
|
||||
if (dir_change_.compare_exchange_weak(expected, false)) {
|
||||
s = dir_ent_->Fsync();
|
||||
s = dir_ent_->Fsync();
|
||||
if (!s.ok()) {
|
||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||
"Failed to sync blob directory, status: %s",
|
||||
s.ToString().c_str());
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1729,7 +1721,6 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
|
||||
|
||||
WriteLock wl(&mutex_);
|
||||
|
||||
dir_change_.store(true);
|
||||
blob_files_.insert(std::make_pair(newfile->BlobFileNumber(), newfile));
|
||||
}
|
||||
|
||||
@ -1757,8 +1748,6 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
|
||||
&rewrite_batch, &callback);
|
||||
}
|
||||
if (rewrite_status.ok()) {
|
||||
newfile->ExtendSequenceRange(
|
||||
WriteBatchInternal::Sequence(&rewrite_batch));
|
||||
gc_stats->num_keys_relocated++;
|
||||
gc_stats->bytes_relocated += record.record_size();
|
||||
} else if (rewrite_status.IsBusy()) {
|
||||
@ -1775,10 +1764,7 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
|
||||
} // end of ReadRecord loop
|
||||
|
||||
if (s.ok()) {
|
||||
SequenceNumber obsolete_sequence =
|
||||
newfile == nullptr ? bfptr->GetSequenceRange().second + 1
|
||||
: newfile->GetSequenceRange().second;
|
||||
bfptr->MarkObsolete(obsolete_sequence);
|
||||
bfptr->MarkObsolete(GetLatestSequenceNumber());
|
||||
if (!first_gc) {
|
||||
WriteLock wl(&mutex_);
|
||||
obsolete_files_.push_back(bfptr);
|
||||
@ -1788,8 +1774,9 @@ Status BlobDBImpl::GCFileAndUpdateLSM(const std::shared_ptr<BlobFile>& bfptr,
|
||||
ROCKS_LOG_INFO(
|
||||
db_options_.info_log,
|
||||
"%s blob file %" PRIu64 ". Total blob records: %" PRIu64
|
||||
", Expired: %" PRIu64 " keys/%" PRIu64 " bytes, Overwritten: %" PRIu64
|
||||
" keys/%" PRIu64 " bytes.",
|
||||
", expired: %" PRIu64 " keys/%" PRIu64
|
||||
" bytes, updated or deleted by user: %" PRIu64 " keys/%" PRIu64
|
||||
" bytes, rewrite to new file: %" PRIu64 " keys/%" PRIu64 " bytes.",
|
||||
s.ok() ? "Successfully garbage collected" : "Failed to garbage collect",
|
||||
bfptr->BlobFileNumber(), gc_stats->blob_count, gc_stats->num_keys_expired,
|
||||
gc_stats->bytes_expired, gc_stats->num_keys_overwritten,
|
||||
@ -1843,14 +1830,9 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr<BlobFile> bfile, uint64_t now,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (bdb_options_.ttl_range_secs < kPartialExpirationGCRangeSecs) {
|
||||
*reason = "has ttl but partial expiration not turned on";
|
||||
return false;
|
||||
}
|
||||
|
||||
ReadLock lockbfile_r(&bfile->mutex_);
|
||||
bool ret = ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) >
|
||||
kPartialExpirationPercentage);
|
||||
bool ret = ((bfile->deleted_size_ / bfile->file_size_.load()) >
|
||||
bdb_options_.garbage_collection_deletion_size_threshold);
|
||||
if (ret) {
|
||||
*reason = "deleted blobs beyond threshold";
|
||||
} else {
|
||||
@ -1869,8 +1851,8 @@ bool BlobDBImpl::ShouldGCFile(std::shared_ptr<BlobFile> bfile, uint64_t now,
|
||||
ReadLock lockbfile_r(&bfile->mutex_);
|
||||
|
||||
if (bdb_options_.enable_garbage_collection) {
|
||||
if ((bfile->deleted_size_ * 100.0 / bfile->file_size_.load()) >
|
||||
kPartialExpirationPercentage) {
|
||||
if ((bfile->deleted_size_ / bfile->file_size_.load()) >
|
||||
bdb_options_.garbage_collection_deletion_size_threshold) {
|
||||
*reason = "deleted simple blobs beyond threshold";
|
||||
return true;
|
||||
}
|
||||
|
@ -144,23 +144,6 @@ class BlobDBImpl : public BlobDB {
|
||||
// how many random access open files can we tolerate
|
||||
static constexpr uint32_t kOpenFilesTrigger = 100;
|
||||
|
||||
// how many periods of stats do we keep.
|
||||
static constexpr uint32_t kWriteAmplificationStatsPeriods = 24;
|
||||
|
||||
// we will garbage collect blob files in
|
||||
// which entire files have expired. However if the
|
||||
// ttl_range of files is very large say a day, we
|
||||
// would have to wait for the entire day, before we
|
||||
// recover most of the space.
|
||||
static constexpr uint32_t kPartialExpirationGCRangeSecs = 4 * 3600;
|
||||
|
||||
// this should be based on allowed Write Amplification
|
||||
// if 50% of the space of a blob file has been deleted/expired,
|
||||
static constexpr uint32_t kPartialExpirationPercentage = 75;
|
||||
|
||||
// how often should we schedule a job to fsync open files
|
||||
static constexpr uint32_t kFSyncFilesPeriodMillisecs = 10 * 1000;
|
||||
|
||||
// how often to schedule reclaim open files.
|
||||
static constexpr uint32_t kReclaimOpenFilesPeriodMillisecs = 1 * 1000;
|
||||
|
||||
@ -228,7 +211,7 @@ class BlobDBImpl : public BlobDB {
|
||||
|
||||
Status Open(std::vector<ColumnFamilyHandle*>* handles);
|
||||
|
||||
Status SyncBlobFiles();
|
||||
Status SyncBlobFiles() override;
|
||||
|
||||
#ifndef NDEBUG
|
||||
Status TEST_GetBlobValue(const Slice& key, const Slice& index_entry,
|
||||
@ -284,7 +267,7 @@ class BlobDBImpl : public BlobDB {
|
||||
|
||||
Status PutBlobValue(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value, uint64_t expiration,
|
||||
SequenceNumber sequence, WriteBatch* batch);
|
||||
WriteBatch* batch);
|
||||
|
||||
Status AppendBlob(const std::shared_ptr<BlobFile>& bfile,
|
||||
const std::string& headerbuf, const Slice& key,
|
||||
@ -313,9 +296,6 @@ class BlobDBImpl : public BlobDB {
|
||||
// Major task to garbage collect expired and deleted blobs
|
||||
std::pair<bool, int64_t> RunGC(bool aborted);
|
||||
|
||||
// asynchronous task to fsync/fdatasync the open blob files
|
||||
std::pair<bool, int64_t> FsyncFiles(bool aborted);
|
||||
|
||||
// periodically check if open blob files and their TTL's has expired
|
||||
// if expired, close the sequential writer and make the file immutable
|
||||
std::pair<bool, int64_t> CheckSeqFiles(bool aborted);
|
||||
@ -420,8 +400,6 @@ class BlobDBImpl : public BlobDB {
|
||||
// pointer to directory
|
||||
std::unique_ptr<Directory> dir_ent_;
|
||||
|
||||
std::atomic<bool> dir_change_;
|
||||
|
||||
// Read Write Mutex, which protects all the data structures
|
||||
// HEAVILY TRAFFICKED
|
||||
mutable port::RWMutex mutex_;
|
||||
|
@ -901,20 +901,18 @@ TEST_F(BlobDBTest, SnapshotAndGarbageCollection) {
|
||||
ASSERT_EQ(1, gc_stats.blob_count);
|
||||
if (delete_key) {
|
||||
ASSERT_EQ(0, gc_stats.num_keys_relocated);
|
||||
ASSERT_EQ(bfile->GetSequenceRange().second + 1,
|
||||
bfile->GetObsoleteSequence());
|
||||
} else {
|
||||
ASSERT_EQ(1, gc_stats.num_keys_relocated);
|
||||
ASSERT_EQ(blob_db_->GetLatestSequenceNumber(),
|
||||
bfile->GetObsoleteSequence());
|
||||
}
|
||||
ASSERT_EQ(blob_db_->GetLatestSequenceNumber(),
|
||||
bfile->GetObsoleteSequence());
|
||||
if (i == 3) {
|
||||
snapshot = blob_db_->GetSnapshot();
|
||||
}
|
||||
size_t num_files = delete_key ? 3 : 4;
|
||||
ASSERT_EQ(num_files, blob_db_impl()->TEST_GetBlobFiles().size());
|
||||
blob_db_impl()->TEST_DeleteObsoleteFiles();
|
||||
if (i == 0 || i == 3 || (i == 2 && delete_key)) {
|
||||
if (i == 3) {
|
||||
// The snapshot shouldn't see data in bfile
|
||||
ASSERT_EQ(num_files - 1, blob_db_impl()->TEST_GetBlobFiles().size());
|
||||
blob_db_->ReleaseSnapshot(snapshot);
|
||||
@ -1063,15 +1061,19 @@ TEST_F(BlobDBTest, OutOfSpace) {
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, EvictOldestFileWhenCloseToSpaceLimit) {
|
||||
// Use mock env to stop wall clock.
|
||||
Options options;
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.blob_dir_size = 270;
|
||||
bdb_options.blob_file_size = 100;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
bdb_options.is_fifo = true;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options);
|
||||
|
||||
std::atomic<int> evict_count{0};
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlobDBImpl::EvictOldestBlobFile:Evicted",
|
||||
[&](void *) { evict_count++; });
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
// Each stored blob has an overhead of 32 bytes currently.
|
||||
// So a 100 byte blob should take up 132 bytes.
|
||||
std::string value(100, 'v');
|
||||
@ -1095,6 +1097,28 @@ TEST_F(BlobDBTest, EvictOldestFileWhenCloseToSpaceLimit) {
|
||||
bdb_impl->TEST_DeleteObsoleteFiles();
|
||||
obsolete_files = bdb_impl->TEST_GetObsoleteFiles();
|
||||
ASSERT_TRUE(obsolete_files.empty());
|
||||
ASSERT_EQ(1, evict_count);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, NoOldestFileToEvict) {
|
||||
Options options;
|
||||
BlobDBOptions bdb_options;
|
||||
bdb_options.blob_dir_size = 1000;
|
||||
bdb_options.blob_file_size = 5000;
|
||||
bdb_options.is_fifo = true;
|
||||
bdb_options.disable_background_tasks = true;
|
||||
Open(bdb_options);
|
||||
|
||||
std::atomic<int> evict_count{0};
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlobDBImpl::EvictOldestBlobFile:Evicted",
|
||||
[&](void *) { evict_count++; });
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
std::string value(2000, 'v');
|
||||
ASSERT_OK(Put("foo", std::string(2000, 'v')));
|
||||
ASSERT_OK(Put("bar", std::string(2000, 'v')));
|
||||
ASSERT_EQ(0, evict_count);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, InlineSmallValues) {
|
||||
@ -1111,10 +1135,6 @@ TEST_F(BlobDBTest, InlineSmallValues) {
|
||||
Open(bdb_options, options);
|
||||
std::map<std::string, std::string> data;
|
||||
std::map<std::string, KeyVersion> versions;
|
||||
SequenceNumber first_non_ttl_seq = kMaxSequenceNumber;
|
||||
SequenceNumber first_ttl_seq = kMaxSequenceNumber;
|
||||
SequenceNumber last_non_ttl_seq = 0;
|
||||
SequenceNumber last_ttl_seq = 0;
|
||||
for (size_t i = 0; i < 1000; i++) {
|
||||
bool is_small_value = rnd.Next() % 2;
|
||||
bool has_ttl = rnd.Next() % 2;
|
||||
@ -1134,15 +1154,6 @@ TEST_F(BlobDBTest, InlineSmallValues) {
|
||||
versions[key] =
|
||||
KeyVersion(key, value, sequence,
|
||||
(is_small_value && !has_ttl) ? kTypeValue : kTypeBlobIndex);
|
||||
if (!is_small_value) {
|
||||
if (!has_ttl) {
|
||||
first_non_ttl_seq = std::min(first_non_ttl_seq, sequence);
|
||||
last_non_ttl_seq = std::max(last_non_ttl_seq, sequence);
|
||||
} else {
|
||||
first_ttl_seq = std::min(first_ttl_seq, sequence);
|
||||
last_ttl_seq = std::max(last_ttl_seq, sequence);
|
||||
}
|
||||
}
|
||||
}
|
||||
VerifyDB(data);
|
||||
VerifyBaseDB(versions);
|
||||
@ -1159,11 +1170,7 @@ TEST_F(BlobDBTest, InlineSmallValues) {
|
||||
ttl_file = blob_files[1];
|
||||
}
|
||||
ASSERT_FALSE(non_ttl_file->HasTTL());
|
||||
ASSERT_EQ(first_non_ttl_seq, non_ttl_file->GetSequenceRange().first);
|
||||
ASSERT_EQ(last_non_ttl_seq, non_ttl_file->GetSequenceRange().second);
|
||||
ASSERT_TRUE(ttl_file->HasTTL());
|
||||
ASSERT_EQ(first_ttl_seq, ttl_file->GetSequenceRange().first);
|
||||
ASSERT_EQ(last_ttl_seq, ttl_file->GetSequenceRange().second);
|
||||
}
|
||||
|
||||
TEST_F(BlobDBTest, CompactionFilterNotSupported) {
|
||||
|
@ -142,8 +142,6 @@ Status BlobDumpTool::DumpBlobLogFooter(uint64_t file_size,
|
||||
fprintf(stdout, " Blob count : %" PRIu64 "\n", footer.blob_count);
|
||||
fprintf(stdout, " Expiration Range : %s\n",
|
||||
GetString(footer.expiration_range).c_str());
|
||||
fprintf(stdout, " Sequence Range : %s\n",
|
||||
GetString(footer.sequence_range).c_str());
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,6 @@ BlobFile::BlobFile()
|
||||
obsolete_(false),
|
||||
gc_once_after_open_(false),
|
||||
expiration_range_({0, 0}),
|
||||
sequence_range_({kMaxSequenceNumber, 0}),
|
||||
last_access_(-1),
|
||||
last_fsync_(0),
|
||||
header_valid_(false),
|
||||
@ -67,7 +66,6 @@ BlobFile::BlobFile(const BlobDBImpl* p, const std::string& bdir, uint64_t fn,
|
||||
obsolete_(false),
|
||||
gc_once_after_open_(false),
|
||||
expiration_range_({0, 0}),
|
||||
sequence_range_({kMaxSequenceNumber, 0}),
|
||||
last_access_(-1),
|
||||
last_fsync_(0),
|
||||
header_valid_(false),
|
||||
@ -116,12 +114,11 @@ std::string BlobFile::DumpState() const {
|
||||
" file_size: %" PRIu64 " deleted_count: %" PRIu64
|
||||
" deleted_size: %" PRIu64
|
||||
" closed: %d obsolete: %d expiration_range: (%" PRIu64 ", %" PRIu64
|
||||
") sequence_range: (%" PRIu64 " %" PRIu64 "), writer: %d reader: %d",
|
||||
"), writer: %d reader: %d",
|
||||
path_to_dir_.c_str(), file_number_, blob_count_.load(),
|
||||
gc_epoch_.load(), file_size_.load(), deleted_count_, deleted_size_,
|
||||
closed_.load(), obsolete_.load(), expiration_range_.first,
|
||||
expiration_range_.second, sequence_range_.first,
|
||||
sequence_range_.second, (!!log_writer_), (!!ra_file_reader_));
|
||||
expiration_range_.second, (!!log_writer_), (!!ra_file_reader_));
|
||||
return str;
|
||||
}
|
||||
|
||||
@ -144,8 +141,6 @@ Status BlobFile::WriteFooterAndCloseLocked() {
|
||||
footer.expiration_range = expiration_range_;
|
||||
}
|
||||
|
||||
footer.sequence_range = sequence_range_;
|
||||
|
||||
// this will close the file and reset the Writable File Pointer.
|
||||
Status s = log_writer_->AppendFooter(footer);
|
||||
if (s.ok()) {
|
||||
@ -185,7 +180,6 @@ Status BlobFile::SetFromFooterLocked(const BlobLogFooter& footer) {
|
||||
last_fsync_.store(file_size_);
|
||||
blob_count_ = footer.blob_count;
|
||||
expiration_range_ = footer.expiration_range;
|
||||
sequence_range_ = footer.sequence_range;
|
||||
closed_ = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -84,8 +84,6 @@ class BlobFile {
|
||||
|
||||
ExpirationRange expiration_range_;
|
||||
|
||||
SequenceRange sequence_range_;
|
||||
|
||||
// Sequential/Append writer for blobs
|
||||
std::shared_ptr<Writer> log_writer_;
|
||||
|
||||
@ -177,17 +175,6 @@ class BlobFile {
|
||||
expiration_range_.second = std::max(expiration_range_.second, expiration);
|
||||
}
|
||||
|
||||
SequenceRange GetSequenceRange() const { return sequence_range_; }
|
||||
|
||||
void SetSequenceRange(SequenceRange sequence_range) {
|
||||
sequence_range_ = sequence_range;
|
||||
}
|
||||
|
||||
void ExtendSequenceRange(SequenceNumber sequence) {
|
||||
sequence_range_.first = std::min(sequence_range_.first, sequence);
|
||||
sequence_range_.second = std::max(sequence_range_.second, sequence);
|
||||
}
|
||||
|
||||
bool HasTTL() const { return has_ttl_; }
|
||||
|
||||
void SetHasTTL(bool has_ttl) { has_ttl_ = has_ttl; }
|
||||
|
@ -67,8 +67,6 @@ void BlobLogFooter::EncodeTo(std::string* dst) {
|
||||
PutFixed64(dst, blob_count);
|
||||
PutFixed64(dst, expiration_range.first);
|
||||
PutFixed64(dst, expiration_range.second);
|
||||
PutFixed64(dst, sequence_range.first);
|
||||
PutFixed64(dst, sequence_range.second);
|
||||
crc = crc32c::Value(dst->c_str(), dst->size());
|
||||
crc = crc32c::Mask(crc);
|
||||
PutFixed32(dst, crc);
|
||||
@ -82,14 +80,12 @@ Status BlobLogFooter::DecodeFrom(Slice src) {
|
||||
"Unexpected blob file footer size");
|
||||
}
|
||||
uint32_t src_crc = 0;
|
||||
src_crc = crc32c::Value(src.data(), BlobLogFooter::kSize - 4);
|
||||
src_crc = crc32c::Value(src.data(), BlobLogFooter::kSize - sizeof(uint32_t));
|
||||
src_crc = crc32c::Mask(src_crc);
|
||||
uint32_t magic_number;
|
||||
if (!GetFixed32(&src, &magic_number) || !GetFixed64(&src, &blob_count) ||
|
||||
!GetFixed64(&src, &expiration_range.first) ||
|
||||
!GetFixed64(&src, &expiration_range.second) ||
|
||||
!GetFixed64(&src, &sequence_range.first) ||
|
||||
!GetFixed64(&src, &sequence_range.second) || !GetFixed32(&src, &crc)) {
|
||||
!GetFixed64(&src, &expiration_range.second) || !GetFixed32(&src, &crc)) {
|
||||
return Status::Corruption(kErrorMessage, "Error decoding content");
|
||||
}
|
||||
if (magic_number != kMagicNumber) {
|
||||
|
@ -24,7 +24,6 @@ constexpr uint32_t kVersion1 = 1;
|
||||
constexpr uint64_t kNoExpiration = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
using ExpirationRange = std::pair<uint64_t, uint64_t>;
|
||||
using SequenceRange = std::pair<uint64_t, uint64_t>;
|
||||
|
||||
// Format of blob log file header (30 bytes):
|
||||
//
|
||||
@ -53,24 +52,23 @@ struct BlobLogHeader {
|
||||
Status DecodeFrom(Slice slice);
|
||||
};
|
||||
|
||||
// Format of blob log file footer (48 bytes):
|
||||
// Format of blob log file footer (32 bytes):
|
||||
//
|
||||
// +--------------+------------+-------------------+-------------------+------------+
|
||||
// | magic number | blob count | expiration range | sequence range | footer CRC |
|
||||
// +--------------+------------+-------------------+-------------------+------------+
|
||||
// | Fixed32 | Fixed64 | Fixed64 + Fixed64 | Fixed64 + Fixed64 | Fixed32 |
|
||||
// +--------------+------------+-------------------+-------------------+------------+
|
||||
// +--------------+------------+-------------------+------------+
|
||||
// | magic number | blob count | expiration range | footer CRC |
|
||||
// +--------------+------------+-------------------+------------+
|
||||
// | Fixed32 | Fixed64 | Fixed64 + Fixed64 | Fixed32 |
|
||||
// +--------------+------------+-------------------+------------+
|
||||
//
|
||||
// The footer will be presented only when the blob file is properly closed.
|
||||
//
|
||||
// Unlike the same field in file header, expiration range in the footer is the
|
||||
// range of smallest and largest expiration of the data in this file.
|
||||
struct BlobLogFooter {
|
||||
static constexpr size_t kSize = 48;
|
||||
static constexpr size_t kSize = 32;
|
||||
|
||||
uint64_t blob_count = 0;
|
||||
ExpirationRange expiration_range = std::make_pair(0, 0);
|
||||
SequenceRange sequence_range = std::make_pair(0, 0);
|
||||
uint32_t crc = 0;
|
||||
|
||||
void EncodeTo(std::string* dst);
|
||||
|
@ -317,10 +317,10 @@ TEST(RowValueTest, PurgeTtlShouldRemvoeAllColumnsExpired) {
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
auto row_value = CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, ToMicroSeconds(now)),
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
std::make_tuple(kExpiringColumn, 2, ToMicroSeconds(now)), // not expired
|
||||
std::make_tuple(kTombstone, 3, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kColumn, 0, ToMicroSeconds(now)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
CreateTestColumnSpec(kExpiringColumn, 2, ToMicroSeconds(now)), // not expired
|
||||
CreateTestColumnSpec(kTombstone, 3, ToMicroSeconds(now))
|
||||
});
|
||||
|
||||
bool changed = false;
|
||||
@ -339,10 +339,10 @@ TEST(RowValueTest, ExpireTtlShouldConvertExpiredColumnsToTombstones) {
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
auto row_value = CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, ToMicroSeconds(now)),
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
std::make_tuple(kExpiringColumn, 2, ToMicroSeconds(now)), // not expired
|
||||
std::make_tuple(kTombstone, 3, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kColumn, 0, ToMicroSeconds(now)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
CreateTestColumnSpec(kExpiringColumn, 2, ToMicroSeconds(now)), // not expired
|
||||
CreateTestColumnSpec(kTombstone, 3, ToMicroSeconds(now))
|
||||
});
|
||||
|
||||
bool changed = false;
|
||||
|
@ -145,21 +145,21 @@ TEST_F(CassandraFunctionalTest, SimpleMergeTest) {
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kTombstone, 0, ToMicroSeconds(now + 5)),
|
||||
std::make_tuple(kColumn, 1, ToMicroSeconds(now + 8)),
|
||||
std::make_tuple(kExpiringColumn, 2, ToMicroSeconds(now + 5)),
|
||||
CreateTestColumnSpec(kTombstone, 0, ToMicroSeconds(now + 5)),
|
||||
CreateTestColumnSpec(kColumn, 1, ToMicroSeconds(now + 8)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 2, ToMicroSeconds(now + 5)),
|
||||
}));
|
||||
store.Append("k1",CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, ToMicroSeconds(now + 2)),
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now + 5)),
|
||||
std::make_tuple(kTombstone, 2, ToMicroSeconds(now + 7)),
|
||||
std::make_tuple(kExpiringColumn, 7, ToMicroSeconds(now + 17)),
|
||||
CreateTestColumnSpec(kColumn, 0, ToMicroSeconds(now + 2)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now + 5)),
|
||||
CreateTestColumnSpec(kTombstone, 2, ToMicroSeconds(now + 7)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 7, ToMicroSeconds(now + 17)),
|
||||
}));
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now + 6)),
|
||||
std::make_tuple(kTombstone, 1, ToMicroSeconds(now + 5)),
|
||||
std::make_tuple(kColumn, 2, ToMicroSeconds(now + 4)),
|
||||
std::make_tuple(kTombstone, 11, ToMicroSeconds(now + 11)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now + 6)),
|
||||
CreateTestColumnSpec(kTombstone, 1, ToMicroSeconds(now + 5)),
|
||||
CreateTestColumnSpec(kColumn, 2, ToMicroSeconds(now + 4)),
|
||||
CreateTestColumnSpec(kTombstone, 11, ToMicroSeconds(now + 11)),
|
||||
}));
|
||||
|
||||
auto ret = store.Get("k1");
|
||||
@ -180,16 +180,16 @@ TEST_F(CassandraFunctionalTest,
|
||||
int64_t now= time(nullptr);
|
||||
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)), //expired
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now - kTtl + 10)), // not expired
|
||||
std::make_tuple(kTombstone, 3, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)), //expired
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now - kTtl + 10)), // not expired
|
||||
CreateTestColumnSpec(kTombstone, 3, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
|
||||
store.Append("k1",CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
std::make_tuple(kColumn, 2, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
CreateTestColumnSpec(kColumn, 2, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
@ -213,16 +213,16 @@ TEST_F(CassandraFunctionalTest,
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)), //expired
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now)), // not expired
|
||||
std::make_tuple(kTombstone, 3, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)), //expired
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now)), // not expired
|
||||
CreateTestColumnSpec(kTombstone, 3, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
|
||||
store.Append("k1",CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
std::make_tuple(kColumn, 2, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)), //expired
|
||||
CreateTestColumnSpec(kColumn, 2, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
@ -244,14 +244,14 @@ TEST_F(CassandraFunctionalTest,
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)),
|
||||
std::make_tuple(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 20)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 20)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, ToMicroSeconds(now - kTtl - 20)),
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
|
||||
store.Append("k1",CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)),
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, ToMicroSeconds(now - kTtl - 10)),
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
@ -266,18 +266,18 @@ TEST_F(CassandraFunctionalTest,
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
store.Append("k1", CreateTestRowValue({
|
||||
std::make_tuple(kTombstone, 0, ToMicroSeconds(now - gc_grace_period_in_seconds_ - 1)),
|
||||
std::make_tuple(kColumn, 1, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kTombstone, 0, ToMicroSeconds(now - gc_grace_period_in_seconds_ - 1)),
|
||||
CreateTestColumnSpec(kColumn, 1, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Append("k2", CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, ToMicroSeconds(now))
|
||||
CreateTestColumnSpec(kColumn, 0, ToMicroSeconds(now))
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
|
||||
store.Append("k1",CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 1, ToMicroSeconds(now)),
|
||||
CreateTestColumnSpec(kColumn, 1, ToMicroSeconds(now)),
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
@ -296,7 +296,7 @@ TEST_F(CassandraFunctionalTest, CompactionShouldRemoveTombstoneFromPut) {
|
||||
int64_t now = time(nullptr);
|
||||
|
||||
store.Put("k1", CreateTestRowValue({
|
||||
std::make_tuple(kTombstone, 0, ToMicroSeconds(now - gc_grace_period_in_seconds_ - 1)),
|
||||
CreateTestColumnSpec(kTombstone, 0, ToMicroSeconds(now - gc_grace_period_in_seconds_ - 1)),
|
||||
}));
|
||||
|
||||
store.Flush();
|
||||
|
@ -15,27 +15,27 @@ TEST(RowValueMergeTest, Merge) {
|
||||
std::vector<RowValue> row_values;
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kTombstone, 0, 5),
|
||||
std::make_tuple(kColumn, 1, 8),
|
||||
std::make_tuple(kExpiringColumn, 2, 5),
|
||||
CreateTestColumnSpec(kTombstone, 0, 5),
|
||||
CreateTestColumnSpec(kColumn, 1, 8),
|
||||
CreateTestColumnSpec(kExpiringColumn, 2, 5),
|
||||
})
|
||||
);
|
||||
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, 2),
|
||||
std::make_tuple(kExpiringColumn, 1, 5),
|
||||
std::make_tuple(kTombstone, 2, 7),
|
||||
std::make_tuple(kExpiringColumn, 7, 17),
|
||||
CreateTestColumnSpec(kColumn, 0, 2),
|
||||
CreateTestColumnSpec(kExpiringColumn, 1, 5),
|
||||
CreateTestColumnSpec(kTombstone, 2, 7),
|
||||
CreateTestColumnSpec(kExpiringColumn, 7, 17),
|
||||
})
|
||||
);
|
||||
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kExpiringColumn, 0, 6),
|
||||
std::make_tuple(kTombstone, 1, 5),
|
||||
std::make_tuple(kColumn, 2, 4),
|
||||
std::make_tuple(kTombstone, 11, 11),
|
||||
CreateTestColumnSpec(kExpiringColumn, 0, 6),
|
||||
CreateTestColumnSpec(kTombstone, 1, 5),
|
||||
CreateTestColumnSpec(kColumn, 2, 4),
|
||||
CreateTestColumnSpec(kTombstone, 11, 11),
|
||||
})
|
||||
);
|
||||
|
||||
@ -60,24 +60,24 @@ TEST(RowValueMergeTest, MergeWithRowTombstone) {
|
||||
// This row's timestamp is smaller than tombstone.
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 0, 5),
|
||||
std::make_tuple(kColumn, 1, 6),
|
||||
CreateTestColumnSpec(kColumn, 0, 5),
|
||||
CreateTestColumnSpec(kColumn, 1, 6),
|
||||
})
|
||||
);
|
||||
|
||||
// Some of the column's row is smaller, some is larger.
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 2, 10),
|
||||
std::make_tuple(kColumn, 3, 12),
|
||||
CreateTestColumnSpec(kColumn, 2, 10),
|
||||
CreateTestColumnSpec(kColumn, 3, 12),
|
||||
})
|
||||
);
|
||||
|
||||
// All of the column's rows are larger than tombstone.
|
||||
row_values.push_back(
|
||||
CreateTestRowValue({
|
||||
std::make_tuple(kColumn, 4, 13),
|
||||
std::make_tuple(kColumn, 5, 14),
|
||||
CreateTestColumnSpec(kColumn, 4, 13),
|
||||
CreateTestColumnSpec(kColumn, 5, 14),
|
||||
})
|
||||
);
|
||||
|
||||
|
@ -129,7 +129,7 @@ std::shared_ptr<Tombstone> ExpiringColumn::ToTombstone() const {
|
||||
int64_t marked_for_delete_at =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(expired_at).count();
|
||||
return std::make_shared<Tombstone>(
|
||||
ColumnTypeMask::DELETION_MASK,
|
||||
static_cast<int8_t>(ColumnTypeMask::DELETION_MASK),
|
||||
Index(),
|
||||
local_deletion_time,
|
||||
marked_for_delete_at);
|
||||
|
@ -29,6 +29,12 @@ std::shared_ptr<ColumnBase> CreateTestColumn(int8_t mask,
|
||||
}
|
||||
}
|
||||
|
||||
std::tuple<int8_t, int8_t, int64_t> CreateTestColumnSpec(int8_t mask,
|
||||
int8_t index,
|
||||
int64_t timestamp) {
|
||||
return std::make_tuple(mask, index, timestamp);
|
||||
}
|
||||
|
||||
RowValue CreateTestRowValue(
|
||||
std::vector<std::tuple<int8_t, int8_t, int64_t>> column_specs) {
|
||||
std::vector<std::shared_ptr<ColumnBase>> columns;
|
||||
|
@ -23,6 +23,10 @@ std::shared_ptr<ColumnBase> CreateTestColumn(int8_t mask,
|
||||
int8_t index,
|
||||
int64_t timestamp);
|
||||
|
||||
std::tuple<int8_t, int8_t, int64_t> CreateTestColumnSpec(int8_t mask,
|
||||
int8_t index,
|
||||
int64_t timestamp);
|
||||
|
||||
RowValue CreateTestRowValue(
|
||||
std::vector<std::tuple<int8_t, int8_t, int64_t>> column_specs);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user