Merge branch 'master' into columnfamilies
Conflicts: db/db_impl.cc db/db_impl_readonly.cc db/db_test.cc db/version_edit.cc db/version_edit.h db/version_set.cc db/version_set.h db/version_set_reduce_num_levels.cc
This commit is contained in:
commit
23f6791c9e
.clang-formatMakefile
build_tools
db
compaction.cccompaction.hcompaction_picker.cccompaction_picker.hdb_bench.ccdb_filesnapshot.ccdb_impl.ccdb_impl.hdb_impl_readonly.ccdb_statistics.ccdb_statistics.hdb_stats_logger.ccdb_test.ccmemtable.ccmemtable.hmerge_helper.ccrepair.ccsimple_table_db_test.ccversion_edit.ccversion_edit.hversion_edit_test.ccversion_set.ccversion_set.hversion_set_reduce_num_levels.ccwrite_batch.ccwrite_batch_test.cc
include/rocksdb
table
tools
util
@ -2,46 +2,4 @@
|
|||||||
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
|
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
|
||||||
---
|
---
|
||||||
BasedOnStyle: Google
|
BasedOnStyle: Google
|
||||||
AccessModifierOffset: -1
|
|
||||||
ConstructorInitializerIndentWidth: 4
|
|
||||||
AlignEscapedNewlinesLeft: true
|
|
||||||
AlignTrailingComments: true
|
|
||||||
AllowAllParametersOfDeclarationOnNextLine: true
|
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
|
||||||
AllowShortLoopsOnASingleLine: false
|
|
||||||
AlwaysBreakTemplateDeclarations: true
|
|
||||||
AlwaysBreakBeforeMultilineStrings: true
|
|
||||||
BreakBeforeBinaryOperators: false
|
|
||||||
BreakConstructorInitializersBeforeComma: false
|
|
||||||
BinPackParameters: false
|
|
||||||
ColumnLimit: 80
|
|
||||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
|
||||||
DerivePointerBinding: true
|
|
||||||
ExperimentalAutoDetectBinPacking: true
|
|
||||||
IndentCaseLabels: false
|
|
||||||
MaxEmptyLinesToKeep: 1
|
|
||||||
NamespaceIndentation: None
|
|
||||||
ObjCSpaceBeforeProtocolList: false
|
|
||||||
PenaltyBreakBeforeFirstCallParameter: 10
|
|
||||||
PenaltyBreakComment: 60
|
|
||||||
PenaltyBreakString: 1000
|
|
||||||
PenaltyBreakFirstLessLess: 20
|
|
||||||
PenaltyExcessCharacter: 1000000
|
|
||||||
PenaltyReturnTypeOnItsOwnLine: 200
|
|
||||||
PointerBindsToType: true
|
|
||||||
SpacesBeforeTrailingComments: 2
|
|
||||||
Cpp11BracedListStyle: true
|
|
||||||
Standard: Cpp11
|
|
||||||
IndentWidth: 2
|
|
||||||
TabWidth: 8
|
|
||||||
UseTab: Never
|
|
||||||
BreakBeforeBraces: Attach
|
|
||||||
IndentFunctionDeclarationAfterType: false
|
|
||||||
SpacesInParentheses: false
|
|
||||||
SpacesInAngles: false
|
|
||||||
SpaceInEmptyParentheses: false
|
|
||||||
SpacesInCStyleCastParentheses: false
|
|
||||||
SpaceAfterControlStatementKeyword: true
|
|
||||||
SpaceBeforeAssignmentOperators: true
|
|
||||||
ContinuationIndentWidth: 4
|
|
||||||
...
|
...
|
||||||
|
19
Makefile
19
Makefile
@ -128,19 +128,21 @@ $(SHARED2): $(SHARED3)
|
|||||||
ln -fs $(SHARED3) $(SHARED2)
|
ln -fs $(SHARED3) $(SHARED2)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(SHARED3):
|
$(SHARED3): $(LIBOBJECTS)
|
||||||
$(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(COVERAGEFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $@ $(LDFLAGS)
|
$(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(LDFLAGS) $(SOURCES)-o $@
|
||||||
|
|
||||||
endif # PLATFORM_SHARED_EXT
|
endif # PLATFORM_SHARED_EXT
|
||||||
|
|
||||||
all: $(LIBRARY) $(PROGRAMS)
|
all: $(LIBRARY) $(PROGRAMS)
|
||||||
|
|
||||||
.PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests \
|
.PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests \
|
||||||
release tags valgrind_check whitebox_crash_test
|
release tags valgrind_check whitebox_crash_test format
|
||||||
|
|
||||||
|
# Will also generate shared libraries.
|
||||||
release:
|
release:
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
OPT=-DNDEBUG $(MAKE) -j32
|
OPT=-DNDEBUG $(MAKE) all -j32
|
||||||
|
OPT=-DNDEBUG $(MAKE) $(SHARED) -j32
|
||||||
|
|
||||||
coverage:
|
coverage:
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
@ -197,6 +199,9 @@ tags:
|
|||||||
ctags * -R
|
ctags * -R
|
||||||
cscope -b `find . -name '*.cc'` `find . -name '*.h'`
|
cscope -b `find . -name '*.cc'` `find . -name '*.h'`
|
||||||
|
|
||||||
|
format:
|
||||||
|
build_tools/format-diff.sh
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Unit tests and tools
|
# Unit tests and tools
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -415,6 +420,12 @@ DEPFILES = $(filter-out util/build_version.d,$(SOURCES:.cc=.d))
|
|||||||
|
|
||||||
depend: $(DEPFILES)
|
depend: $(DEPFILES)
|
||||||
|
|
||||||
|
# if the make goal is either "clean" or "format", we shouldn't
|
||||||
|
# try to import the *.d files.
|
||||||
|
# TODO(kailiu) The unfamiliarity of Make's conditions leads to the ugly
|
||||||
|
# working solution.
|
||||||
ifneq ($(MAKECMDGOALS),clean)
|
ifneq ($(MAKECMDGOALS),clean)
|
||||||
|
ifneq ($(MAKECMDGOALS),format)
|
||||||
-include $(DEPFILES)
|
-include $(DEPFILES)
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
@ -81,9 +81,9 @@ PLATFORM_CCFLAGS=
|
|||||||
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
|
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
|
||||||
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS"
|
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS"
|
||||||
PLATFORM_SHARED_EXT="so"
|
PLATFORM_SHARED_EXT="so"
|
||||||
PLATFORM_SHARED_LDFLAGS="${EXEC_LDFLAGS_SHARED} -shared -Wl,-soname -Wl,"
|
PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl,"
|
||||||
PLATFORM_SHARED_CFLAGS="-fPIC"
|
PLATFORM_SHARED_CFLAGS="-fPIC"
|
||||||
PLATFORM_SHARED_VERSIONED=true
|
PLATFORM_SHARED_VERSIONED=false
|
||||||
|
|
||||||
# generic port files (working on all platform by #ifdef) go directly in /port
|
# generic port files (working on all platform by #ifdef) go directly in /port
|
||||||
GENERIC_PORT_FILES=`find $ROCKSDB_ROOT/port -name '*.cc' | tr "\n" " "`
|
GENERIC_PORT_FILES=`find $ROCKSDB_ROOT/port -name '*.cc' | tr "\n" " "`
|
||||||
|
@ -60,7 +60,7 @@ AR=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ar
|
|||||||
RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib
|
RANLIB=$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/ranlib
|
||||||
|
|
||||||
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic"
|
CFLAGS="-B$TOOLCHAIN_EXECUTABLES/binutils/binutils-2.21.1/da39a3e/bin/gold -m64 -mtune=generic"
|
||||||
CFLAGS+=" -nostdlib $LIBGCC_INCLUDE $GLIBC_INCLUDE"
|
CFLAGS+=" $LIBGCC_INCLUDE $GLIBC_INCLUDE"
|
||||||
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT -DROCKSDB_FALLOCATE_PRESENT"
|
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_ATOMIC_PRESENT -DROCKSDB_FALLOCATE_PRESENT"
|
||||||
CFLAGS+=" -DSNAPPY -DGFLAGS -DZLIB -DBZIP2"
|
CFLAGS+=" -DSNAPPY -DGFLAGS -DZLIB -DBZIP2"
|
||||||
|
|
||||||
|
109
build_tools/format-diff.sh
Executable file
109
build_tools/format-diff.sh
Executable file
@ -0,0 +1,109 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# If clang_format_diff.py command is not specfied, we assume we are able to
|
||||||
|
# access directly without any path.
|
||||||
|
if [ -z $CLANG_FORMAT_DIFF ]
|
||||||
|
then
|
||||||
|
CLANG_FORMAT_DIFF="clang-format-diff.py"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check clang-format-diff.py
|
||||||
|
if ! which $CLANG_FORMAT_DIFF &> /dev/null
|
||||||
|
then
|
||||||
|
echo "You didn't have clang-format-diff.py available in your computer!"
|
||||||
|
echo "You can download it by running: "
|
||||||
|
echo " curl http://goo.gl/iUW1u2"
|
||||||
|
exit 128
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check argparse, a library that clang-format-diff.py requires.
|
||||||
|
python 2>/dev/null << EOF
|
||||||
|
import argparse
|
||||||
|
EOF
|
||||||
|
|
||||||
|
if [ "$?" != 0 ]
|
||||||
|
then
|
||||||
|
echo "To run clang-format-diff.py, we'll need the library "argparse" to be"
|
||||||
|
echo "installed. You can try either of the follow ways to install it:"
|
||||||
|
echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse"
|
||||||
|
echo " 2. easy_install argparse (if you have easy_install)"
|
||||||
|
echo " 3. pip install argparse (if you have pip)"
|
||||||
|
exit 129
|
||||||
|
fi
|
||||||
|
|
||||||
|
# TODO(kailiu) following work is not complete since we still need to figure
|
||||||
|
# out how to add the modified files done pre-commit hook to git's commit index.
|
||||||
|
#
|
||||||
|
# Check if this script has already been added to pre-commit hook.
|
||||||
|
# Will suggest user to add this script to pre-commit hook if their pre-commit
|
||||||
|
# is empty.
|
||||||
|
# PRE_COMMIT_SCRIPT_PATH="`git rev-parse --show-toplevel`/.git/hooks/pre-commit"
|
||||||
|
# if ! ls $PRE_COMMIT_SCRIPT_PATH &> /dev/null
|
||||||
|
# then
|
||||||
|
# echo "Would you like to add this script to pre-commit hook, which will do "
|
||||||
|
# echo -n "the format check for all the affected lines before you check in (y/n):"
|
||||||
|
# read add_to_hook
|
||||||
|
# if [ "$add_to_hook" == "y" ]
|
||||||
|
# then
|
||||||
|
# ln -s `git rev-parse --show-toplevel`/build_tools/format-diff.sh $PRE_COMMIT_SCRIPT_PATH
|
||||||
|
# fi
|
||||||
|
# fi
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
uncommitted_code=`git diff HEAD`
|
||||||
|
|
||||||
|
# If there's no uncommitted changes, we assume user are doing post-commit
|
||||||
|
# format check, in which case we'll check the modified lines from latest commit.
|
||||||
|
# Otherwise, we'll check format of the uncommitted code only.
|
||||||
|
format_last_commit=0
|
||||||
|
if [ -z "$uncommitted_code" ]
|
||||||
|
then
|
||||||
|
# Check the format of last commit
|
||||||
|
diffs=$(git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -p 1)
|
||||||
|
else
|
||||||
|
# Check the format of uncommitted lines,
|
||||||
|
diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$diffs" ]
|
||||||
|
then
|
||||||
|
echo "Nothing needs to be reformatted!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Highlight the insertion/deletion from the clang-format-diff.py's output
|
||||||
|
COLOR_END="\033[0m"
|
||||||
|
COLOR_RED="\033[0;31m"
|
||||||
|
COLOR_GREEN="\033[0;32m"
|
||||||
|
|
||||||
|
echo -e "Detect lines that doesn't follow the format rules:\r"
|
||||||
|
# Add the color to the diff. lines added will be green; lines removed will be red.
|
||||||
|
echo "$diffs" |
|
||||||
|
sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" |
|
||||||
|
sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/"
|
||||||
|
echo -e "Would you like to fix the format automatically (y/n): \c"
|
||||||
|
|
||||||
|
# Make sure under any mode, we can read user input.
|
||||||
|
exec < /dev/tty
|
||||||
|
read to_fix
|
||||||
|
|
||||||
|
if [ "$to_fix" != "y" ]
|
||||||
|
then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Do in-place format adjustment.
|
||||||
|
git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -i -p 1
|
||||||
|
echo "Files reformatted!"
|
||||||
|
|
||||||
|
# Amend to last commit if user do the post-commit format check
|
||||||
|
if [ -z "$uncommitted_code" ]; then
|
||||||
|
echo -e "Would you like to amend the changes to last commit (`git log HEAD --oneline | head -1`)? (y/n): \c"
|
||||||
|
read to_amend
|
||||||
|
|
||||||
|
if [ "$to_amend" == "y" ]
|
||||||
|
then
|
||||||
|
git commit -a --amend --reuse-message HEAD
|
||||||
|
echo "Amended to last commit"
|
||||||
|
fi
|
||||||
|
fi
|
214
db/compaction.cc
Normal file
214
db/compaction.cc
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "db/compaction.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||||
|
uint64_t sum = 0;
|
||||||
|
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
||||||
|
sum += files[i]->file_size;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
Compaction::Compaction(Version* input_version, int level, int out_level,
|
||||||
|
uint64_t target_file_size,
|
||||||
|
uint64_t max_grandparent_overlap_bytes,
|
||||||
|
bool seek_compaction, bool enable_compression)
|
||||||
|
: level_(level),
|
||||||
|
out_level_(out_level),
|
||||||
|
max_output_file_size_(target_file_size),
|
||||||
|
maxGrandParentOverlapBytes_(max_grandparent_overlap_bytes),
|
||||||
|
input_version_(input_version),
|
||||||
|
number_levels_(input_version_->NumberLevels()),
|
||||||
|
seek_compaction_(seek_compaction),
|
||||||
|
enable_compression_(enable_compression),
|
||||||
|
grandparent_index_(0),
|
||||||
|
seen_key_(false),
|
||||||
|
overlapped_bytes_(0),
|
||||||
|
base_index_(-1),
|
||||||
|
parent_index_(-1),
|
||||||
|
score_(0),
|
||||||
|
bottommost_level_(false),
|
||||||
|
is_full_compaction_(false),
|
||||||
|
level_ptrs_(std::vector<size_t>(number_levels_)) {
|
||||||
|
|
||||||
|
input_version_->Ref();
|
||||||
|
edit_ = new VersionEdit();
|
||||||
|
for (int i = 0; i < number_levels_; i++) {
|
||||||
|
level_ptrs_[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Compaction::~Compaction() {
|
||||||
|
delete edit_;
|
||||||
|
if (input_version_ != nullptr) {
|
||||||
|
input_version_->Unref();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Compaction::IsTrivialMove() const {
|
||||||
|
// Avoid a move if there is lots of overlapping grandparent data.
|
||||||
|
// Otherwise, the move could create a parent file that will require
|
||||||
|
// a very expensive merge later on.
|
||||||
|
// If level_== out_level_, the purpose is to force compaction filter to be
|
||||||
|
// applied to that level, and thus cannot be a trivia move.
|
||||||
|
return (level_ != out_level_ &&
|
||||||
|
num_input_files(0) == 1 &&
|
||||||
|
num_input_files(1) == 0 &&
|
||||||
|
TotalFileSize(grandparents_) <= maxGrandParentOverlapBytes_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
||||||
|
for (int which = 0; which < 2; which++) {
|
||||||
|
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
||||||
|
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
|
||||||
|
if (input_version_->vset_->options_->compaction_style ==
|
||||||
|
kCompactionStyleUniversal) {
|
||||||
|
return bottommost_level_;
|
||||||
|
}
|
||||||
|
// Maybe use binary search to find right entry instead of linear search?
|
||||||
|
const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator();
|
||||||
|
for (int lvl = level_ + 2; lvl < number_levels_; lvl++) {
|
||||||
|
const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
|
||||||
|
for (; level_ptrs_[lvl] < files.size(); ) {
|
||||||
|
FileMetaData* f = files[level_ptrs_[lvl]];
|
||||||
|
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
||||||
|
// We've advanced far enough
|
||||||
|
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
|
||||||
|
// Key falls in this file's range, so definitely not base level
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
level_ptrs_[lvl]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Compaction::ShouldStopBefore(const Slice& internal_key) {
|
||||||
|
// Scan to find earliest grandparent file that contains key.
|
||||||
|
const InternalKeyComparator* icmp = &input_version_->vset_->icmp_;
|
||||||
|
while (grandparent_index_ < grandparents_.size() &&
|
||||||
|
icmp->Compare(internal_key,
|
||||||
|
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
|
||||||
|
if (seen_key_) {
|
||||||
|
overlapped_bytes_ += grandparents_[grandparent_index_]->file_size;
|
||||||
|
}
|
||||||
|
assert(grandparent_index_ + 1 >= grandparents_.size() ||
|
||||||
|
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
|
||||||
|
grandparents_[grandparent_index_+1]->smallest.Encode())
|
||||||
|
< 0);
|
||||||
|
grandparent_index_++;
|
||||||
|
}
|
||||||
|
seen_key_ = true;
|
||||||
|
|
||||||
|
if (overlapped_bytes_ > maxGrandParentOverlapBytes_) {
|
||||||
|
// Too much overlap for current output; start new output
|
||||||
|
overlapped_bytes_ = 0;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark (or clear) each file that is being compacted
|
||||||
|
void Compaction::MarkFilesBeingCompacted(bool value) {
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
std::vector<FileMetaData*> v = inputs_[i];
|
||||||
|
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
|
||||||
|
assert(value ? !inputs_[i][j]->being_compacted :
|
||||||
|
inputs_[i][j]->being_compacted);
|
||||||
|
inputs_[i][j]->being_compacted = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is this compaction producing files at the bottommost level?
|
||||||
|
void Compaction::SetupBottomMostLevel(bool isManual) {
|
||||||
|
if (input_version_->vset_->options_->compaction_style ==
|
||||||
|
kCompactionStyleUniversal) {
|
||||||
|
// If universal compaction style is used and manual
|
||||||
|
// compaction is occuring, then we are guaranteed that
|
||||||
|
// all files will be picked in a single compaction
|
||||||
|
// run. We can safely set bottommost_level_ = true.
|
||||||
|
// If it is not manual compaction, then bottommost_level_
|
||||||
|
// is already set when the Compaction was created.
|
||||||
|
if (isManual) {
|
||||||
|
bottommost_level_ = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bottommost_level_ = true;
|
||||||
|
int num_levels = input_version_->vset_->NumberLevels();
|
||||||
|
for (int i = output_level() + 1; i < num_levels; i++) {
|
||||||
|
if (input_version_->NumLevelFiles(i) > 0) {
|
||||||
|
bottommost_level_ = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compaction::ReleaseInputs() {
|
||||||
|
if (input_version_ != nullptr) {
|
||||||
|
input_version_->Unref();
|
||||||
|
input_version_ = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compaction::ResetNextCompactionIndex() {
|
||||||
|
input_version_->ResetNextCompactionIndex(level_);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void InputSummary(std::vector<FileMetaData*>& files, char* output,
|
||||||
|
int len) {
|
||||||
|
int write = 0;
|
||||||
|
for (unsigned int i = 0; i < files.size(); i++) {
|
||||||
|
int sz = len - write;
|
||||||
|
int ret = snprintf(output + write, sz, "%lu(%lu) ",
|
||||||
|
(unsigned long)files.at(i)->number,
|
||||||
|
(unsigned long)files.at(i)->file_size);
|
||||||
|
if (ret < 0 || ret >= sz)
|
||||||
|
break;
|
||||||
|
write += ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compaction::Summary(char* output, int len) {
|
||||||
|
int write = snprintf(output, len,
|
||||||
|
"Base version %lu Base level %d, seek compaction:%d, inputs:",
|
||||||
|
(unsigned long)input_version_->GetVersionNumber(),
|
||||||
|
level_,
|
||||||
|
seek_compaction_);
|
||||||
|
if (write < 0 || write > len) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char level_low_summary[100];
|
||||||
|
InputSummary(inputs_[0], level_low_summary, sizeof(level_low_summary));
|
||||||
|
char level_up_summary[100];
|
||||||
|
if (inputs_[1].size()) {
|
||||||
|
InputSummary(inputs_[1], level_up_summary, sizeof(level_up_summary));
|
||||||
|
} else {
|
||||||
|
level_up_summary[0] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(output + write, len - write, "[%s],[%s]",
|
||||||
|
level_low_summary, level_up_summary);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
134
db/compaction.h
Normal file
134
db/compaction.h
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "db/version_set.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class Version;
|
||||||
|
|
||||||
|
// A Compaction encapsulates information about a compaction.
|
||||||
|
class Compaction {
|
||||||
|
public:
|
||||||
|
~Compaction();
|
||||||
|
|
||||||
|
// Return the level that is being compacted. Inputs from "level"
|
||||||
|
// will be merged.
|
||||||
|
int level() const { return level_; }
|
||||||
|
|
||||||
|
// Outputs will go to this level
|
||||||
|
int output_level() const { return out_level_; }
|
||||||
|
|
||||||
|
// Return the object that holds the edits to the descriptor done
|
||||||
|
// by this compaction.
|
||||||
|
VersionEdit* edit() { return edit_; }
|
||||||
|
|
||||||
|
// "which" must be either 0 or 1
|
||||||
|
int num_input_files(int which) const { return inputs_[which].size(); }
|
||||||
|
|
||||||
|
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
|
||||||
|
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
|
||||||
|
|
||||||
|
// Maximum size of files to build during this compaction.
|
||||||
|
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
||||||
|
|
||||||
|
// Whether compression will be enabled for compaction outputs
|
||||||
|
bool enable_compression() const { return enable_compression_; }
|
||||||
|
|
||||||
|
// Is this a trivial compaction that can be implemented by just
|
||||||
|
// moving a single input file to the next level (no merging or splitting)
|
||||||
|
bool IsTrivialMove() const;
|
||||||
|
|
||||||
|
// Add all inputs to this compaction as delete operations to *edit.
|
||||||
|
void AddInputDeletions(VersionEdit* edit);
|
||||||
|
|
||||||
|
// Returns true if the information we have available guarantees that
|
||||||
|
// the compaction is producing data in "level+1" for which no data exists
|
||||||
|
// in levels greater than "level+1".
|
||||||
|
bool IsBaseLevelForKey(const Slice& user_key);
|
||||||
|
|
||||||
|
// Returns true iff we should stop building the current output
|
||||||
|
// before processing "internal_key".
|
||||||
|
bool ShouldStopBefore(const Slice& internal_key);
|
||||||
|
|
||||||
|
// Release the input version for the compaction, once the compaction
|
||||||
|
// is successful.
|
||||||
|
void ReleaseInputs();
|
||||||
|
|
||||||
|
void Summary(char* output, int len);
|
||||||
|
|
||||||
|
// Return the score that was used to pick this compaction run.
|
||||||
|
double score() const { return score_; }
|
||||||
|
|
||||||
|
// Is this compaction creating a file in the bottom most level?
|
||||||
|
bool BottomMostLevel() { return bottommost_level_; }
|
||||||
|
|
||||||
|
// Does this compaction include all sst files?
|
||||||
|
bool IsFullCompaction() { return is_full_compaction_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class Version;
|
||||||
|
friend class VersionSet;
|
||||||
|
friend class CompactionPicker;
|
||||||
|
friend class UniversalCompactionPicker;
|
||||||
|
friend class LevelCompactionPicker;
|
||||||
|
|
||||||
|
Compaction(Version* input_version, int level, int out_level,
|
||||||
|
uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes,
|
||||||
|
bool seek_compaction = false, bool enable_compression = true);
|
||||||
|
|
||||||
|
int level_;
|
||||||
|
int out_level_; // levels to which output files are stored
|
||||||
|
uint64_t max_output_file_size_;
|
||||||
|
uint64_t maxGrandParentOverlapBytes_;
|
||||||
|
Version* input_version_;
|
||||||
|
VersionEdit* edit_;
|
||||||
|
int number_levels_;
|
||||||
|
|
||||||
|
bool seek_compaction_;
|
||||||
|
bool enable_compression_;
|
||||||
|
|
||||||
|
// Each compaction reads inputs from "level_" and "level_+1"
|
||||||
|
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
||||||
|
|
||||||
|
// State used to check for number of of overlapping grandparent files
|
||||||
|
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||||
|
std::vector<FileMetaData*> grandparents_;
|
||||||
|
size_t grandparent_index_; // Index in grandparent_starts_
|
||||||
|
bool seen_key_; // Some output key has been seen
|
||||||
|
uint64_t overlapped_bytes_; // Bytes of overlap between current output
|
||||||
|
// and grandparent files
|
||||||
|
int base_index_; // index of the file in files_[level_]
|
||||||
|
int parent_index_; // index of some file with same range in files_[level_+1]
|
||||||
|
double score_; // score that was used to pick this compaction.
|
||||||
|
|
||||||
|
// Is this compaction creating a file in the bottom most level?
|
||||||
|
bool bottommost_level_;
|
||||||
|
// Does this compaction include all sst files?
|
||||||
|
bool is_full_compaction_;
|
||||||
|
|
||||||
|
// level_ptrs_ holds indices into input_version_->levels_: our state
|
||||||
|
// is that we are positioned at one of the file ranges for each
|
||||||
|
// higher level than the ones involved in this compaction (i.e. for
|
||||||
|
// all L >= level_ + 2).
|
||||||
|
std::vector<size_t> level_ptrs_;
|
||||||
|
|
||||||
|
// mark (or clear) all files that are being compacted
|
||||||
|
void MarkFilesBeingCompacted(bool);
|
||||||
|
|
||||||
|
// Initialize whether compaction producing files at the bottommost level
|
||||||
|
void SetupBottomMostLevel(bool isManual);
|
||||||
|
|
||||||
|
// In case of compaction error, reset the nextIndex that is used
|
||||||
|
// to pick up the next file to be compacted from files_by_size_
|
||||||
|
void ResetNextCompactionIndex();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
847
db/compaction_picker.cc
Normal file
847
db/compaction_picker.cc
Normal file
@ -0,0 +1,847 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "db/compaction_picker.h"
|
||||||
|
#include "util/statistics.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||||
|
uint64_t sum = 0;
|
||||||
|
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
||||||
|
sum += files[i]->file_size;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
CompactionPicker::CompactionPicker(const Options* options,
|
||||||
|
const InternalKeyComparator* icmp)
|
||||||
|
: compactions_in_progress_(options->num_levels),
|
||||||
|
options_(options),
|
||||||
|
num_levels_(options->num_levels),
|
||||||
|
icmp_(icmp) {
|
||||||
|
Init();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CompactionPicker::ReduceNumberOfLevels(int new_levels) {
|
||||||
|
num_levels_ = new_levels;
|
||||||
|
Init();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CompactionPicker::Init() {
|
||||||
|
max_file_size_.reset(new uint64_t[NumberLevels()]);
|
||||||
|
level_max_bytes_.reset(new uint64_t[NumberLevels()]);
|
||||||
|
int target_file_size_multiplier = options_->target_file_size_multiplier;
|
||||||
|
int max_bytes_multiplier = options_->max_bytes_for_level_multiplier;
|
||||||
|
for (int i = 0; i < NumberLevels(); i++) {
|
||||||
|
if (i == 0 && options_->compaction_style == kCompactionStyleUniversal) {
|
||||||
|
max_file_size_[i] = ULLONG_MAX;
|
||||||
|
level_max_bytes_[i] = options_->max_bytes_for_level_base;
|
||||||
|
} else if (i > 1) {
|
||||||
|
max_file_size_[i] = max_file_size_[i - 1] * target_file_size_multiplier;
|
||||||
|
level_max_bytes_[i] =
|
||||||
|
level_max_bytes_[i - 1] * max_bytes_multiplier *
|
||||||
|
options_->max_bytes_for_level_multiplier_additional[i - 1];
|
||||||
|
} else {
|
||||||
|
max_file_size_[i] = options_->target_file_size_base;
|
||||||
|
level_max_bytes_[i] = options_->max_bytes_for_level_base;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CompactionPicker::~CompactionPicker() {}
|
||||||
|
|
||||||
|
void CompactionPicker::SizeBeingCompacted(std::vector<uint64_t>& sizes) {
|
||||||
|
for (int level = 0; level < NumberLevels() - 1; level++) {
|
||||||
|
uint64_t total = 0;
|
||||||
|
for (auto c : compactions_in_progress_[level]) {
|
||||||
|
assert(c->level() == level);
|
||||||
|
for (int i = 0; i < c->num_input_files(0); i++) {
|
||||||
|
total += c->input(0,i)->file_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sizes[level] = total;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear all files to indicate that they are not being compacted
|
||||||
|
// Delete this compaction from the list of running compactions.
|
||||||
|
void CompactionPicker::ReleaseCompactionFiles(Compaction* c, Status status) {
|
||||||
|
c->MarkFilesBeingCompacted(false);
|
||||||
|
compactions_in_progress_[c->level()].erase(c);
|
||||||
|
if (!status.ok()) {
|
||||||
|
c->ResetNextCompactionIndex();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CompactionPicker::MaxFileSizeForLevel(int level) const {
|
||||||
|
assert(level >= 0);
|
||||||
|
assert(level < NumberLevels());
|
||||||
|
return max_file_size_[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CompactionPicker::MaxGrandParentOverlapBytes(int level) {
|
||||||
|
uint64_t result = MaxFileSizeForLevel(level);
|
||||||
|
result *= options_->max_grandparent_overlap_factor;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
double CompactionPicker::MaxBytesForLevel(int level) {
|
||||||
|
// Note: the result for level zero is not really used since we set
|
||||||
|
// the level-0 compaction threshold based on number of files.
|
||||||
|
assert(level >= 0);
|
||||||
|
assert(level < NumberLevels());
|
||||||
|
return level_max_bytes_[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
void CompactionPicker::GetRange(const std::vector<FileMetaData*>& inputs,
|
||||||
|
InternalKey* smallest, InternalKey* largest) {
|
||||||
|
assert(!inputs.empty());
|
||||||
|
smallest->Clear();
|
||||||
|
largest->Clear();
|
||||||
|
for (size_t i = 0; i < inputs.size(); i++) {
|
||||||
|
FileMetaData* f = inputs[i];
|
||||||
|
if (i == 0) {
|
||||||
|
*smallest = f->smallest;
|
||||||
|
*largest = f->largest;
|
||||||
|
} else {
|
||||||
|
if (icmp_->Compare(f->smallest, *smallest) < 0) {
|
||||||
|
*smallest = f->smallest;
|
||||||
|
}
|
||||||
|
if (icmp_->Compare(f->largest, *largest) > 0) {
|
||||||
|
*largest = f->largest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CompactionPicker::GetRange(const std::vector<FileMetaData*>& inputs1,
|
||||||
|
const std::vector<FileMetaData*>& inputs2,
|
||||||
|
InternalKey* smallest, InternalKey* largest) {
|
||||||
|
std::vector<FileMetaData*> all = inputs1;
|
||||||
|
all.insert(all.end(), inputs2.begin(), inputs2.end());
|
||||||
|
GetRange(all, smallest, largest);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CompactionPicker::ExpandWhileOverlapping(Compaction* c) {
|
||||||
|
// If inputs are empty then there is nothing to expand.
|
||||||
|
if (!c || c->inputs_[0].empty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOverlappingInputs will always do the right thing for level-0.
|
||||||
|
// So we don't need to do any expansion if level == 0.
|
||||||
|
if (c->level() == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int level = c->level();
|
||||||
|
InternalKey smallest, largest;
|
||||||
|
|
||||||
|
// Keep expanding c->inputs_[0] until we are sure that there is a
|
||||||
|
// "clean cut" boundary between the files in input and the surrounding files.
|
||||||
|
// This will ensure that no parts of a key are lost during compaction.
|
||||||
|
int hint_index = -1;
|
||||||
|
size_t old_size;
|
||||||
|
do {
|
||||||
|
old_size = c->inputs_[0].size();
|
||||||
|
GetRange(c->inputs_[0], &smallest, &largest);
|
||||||
|
c->inputs_[0].clear();
|
||||||
|
c->input_version_->GetOverlappingInputs(
|
||||||
|
level, &smallest, &largest, &c->inputs_[0], hint_index, &hint_index);
|
||||||
|
} while(c->inputs_[0].size() > old_size);
|
||||||
|
|
||||||
|
// Get the new range
|
||||||
|
GetRange(c->inputs_[0], &smallest, &largest);
|
||||||
|
|
||||||
|
// If, after the expansion, there are files that are already under
|
||||||
|
// compaction, then we must drop/cancel this compaction.
|
||||||
|
int parent_index = -1;
|
||||||
|
if (FilesInCompaction(c->inputs_[0]) ||
|
||||||
|
(c->level() != c->output_level() &&
|
||||||
|
ParentRangeInCompaction(c->input_version_, &smallest, &largest, level,
|
||||||
|
&parent_index))) {
|
||||||
|
c->inputs_[0].clear();
|
||||||
|
c->inputs_[1].clear();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t CompactionPicker::ExpandedCompactionByteSizeLimit(int level) {
|
||||||
|
uint64_t result = MaxFileSizeForLevel(level);
|
||||||
|
result *= options_->expanded_compaction_factor;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if any one of specified files are being compacted
|
||||||
|
bool CompactionPicker::FilesInCompaction(std::vector<FileMetaData*>& files) {
|
||||||
|
for (unsigned int i = 0; i < files.size(); i++) {
|
||||||
|
if (files[i]->being_compacted) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if any one of the parent files are being compacted
|
||||||
|
bool CompactionPicker::ParentRangeInCompaction(Version* version,
|
||||||
|
const InternalKey* smallest,
|
||||||
|
const InternalKey* largest,
|
||||||
|
int level, int* parent_index) {
|
||||||
|
std::vector<FileMetaData*> inputs;
|
||||||
|
assert(level + 1 < NumberLevels());
|
||||||
|
|
||||||
|
version->GetOverlappingInputs(level + 1, smallest, largest, &inputs,
|
||||||
|
*parent_index, parent_index);
|
||||||
|
return FilesInCompaction(inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populates the set of inputs from "level+1" that overlap with "level".
|
||||||
|
// Will also attempt to expand "level" if that doesn't expand "level+1"
|
||||||
|
// or cause "level" to include a file for compaction that has an overlapping
|
||||||
|
// user-key with another file.
|
||||||
|
void CompactionPicker::SetupOtherInputs(Compaction* c) {
|
||||||
|
// If inputs are empty, then there is nothing to expand.
|
||||||
|
// If both input and output levels are the same, no need to consider
|
||||||
|
// files at level "level+1"
|
||||||
|
if (c->inputs_[0].empty() || c->level() == c->output_level()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int level = c->level();
|
||||||
|
InternalKey smallest, largest;
|
||||||
|
|
||||||
|
// Get the range one last time.
|
||||||
|
GetRange(c->inputs_[0], &smallest, &largest);
|
||||||
|
|
||||||
|
// Populate the set of next-level files (inputs_[1]) to include in compaction
|
||||||
|
c->input_version_->GetOverlappingInputs(level + 1, &smallest, &largest,
|
||||||
|
&c->inputs_[1], c->parent_index_,
|
||||||
|
&c->parent_index_);
|
||||||
|
|
||||||
|
// Get entire range covered by compaction
|
||||||
|
InternalKey all_start, all_limit;
|
||||||
|
GetRange(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
|
||||||
|
|
||||||
|
// See if we can further grow the number of inputs in "level" without
|
||||||
|
// changing the number of "level+1" files we pick up. We also choose NOT
|
||||||
|
// to expand if this would cause "level" to include some entries for some
|
||||||
|
// user key, while excluding other entries for the same user key. This
|
||||||
|
// can happen when one user key spans multiple files.
|
||||||
|
if (!c->inputs_[1].empty()) {
|
||||||
|
std::vector<FileMetaData*> expanded0;
|
||||||
|
c->input_version_->GetOverlappingInputs(
|
||||||
|
level, &all_start, &all_limit, &expanded0, c->base_index_, nullptr);
|
||||||
|
const uint64_t inputs0_size = TotalFileSize(c->inputs_[0]);
|
||||||
|
const uint64_t inputs1_size = TotalFileSize(c->inputs_[1]);
|
||||||
|
const uint64_t expanded0_size = TotalFileSize(expanded0);
|
||||||
|
uint64_t limit = ExpandedCompactionByteSizeLimit(level);
|
||||||
|
if (expanded0.size() > c->inputs_[0].size() &&
|
||||||
|
inputs1_size + expanded0_size < limit &&
|
||||||
|
!FilesInCompaction(expanded0) &&
|
||||||
|
!c->input_version_->HasOverlappingUserKey(&expanded0, level)) {
|
||||||
|
InternalKey new_start, new_limit;
|
||||||
|
GetRange(expanded0, &new_start, &new_limit);
|
||||||
|
std::vector<FileMetaData*> expanded1;
|
||||||
|
c->input_version_->GetOverlappingInputs(level + 1, &new_start, &new_limit,
|
||||||
|
&expanded1, c->parent_index_,
|
||||||
|
&c->parent_index_);
|
||||||
|
if (expanded1.size() == c->inputs_[1].size() &&
|
||||||
|
!FilesInCompaction(expanded1)) {
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu bytes)"
|
||||||
|
"\n",
|
||||||
|
(unsigned long)level,
|
||||||
|
(unsigned long)(c->inputs_[0].size()),
|
||||||
|
(unsigned long)(c->inputs_[1].size()),
|
||||||
|
(unsigned long)inputs0_size,
|
||||||
|
(unsigned long)inputs1_size,
|
||||||
|
(unsigned long)(expanded0.size()),
|
||||||
|
(unsigned long)(expanded1.size()),
|
||||||
|
(unsigned long)expanded0_size,
|
||||||
|
(unsigned long)inputs1_size);
|
||||||
|
smallest = new_start;
|
||||||
|
largest = new_limit;
|
||||||
|
c->inputs_[0] = expanded0;
|
||||||
|
c->inputs_[1] = expanded1;
|
||||||
|
GetRange(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the set of grandparent files that overlap this compaction
|
||||||
|
// (parent == level+1; grandparent == level+2)
|
||||||
|
if (level + 2 < NumberLevels()) {
|
||||||
|
c->input_version_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
|
||||||
|
&c->grandparents_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Compaction* CompactionPicker::CompactRange(Version* version, int input_level,
|
||||||
|
int output_level,
|
||||||
|
const InternalKey* begin,
|
||||||
|
const InternalKey* end,
|
||||||
|
InternalKey** compaction_end) {
|
||||||
|
std::vector<FileMetaData*> inputs;
|
||||||
|
bool covering_the_whole_range = true;
|
||||||
|
|
||||||
|
// All files are 'overlapping' in universal style compaction.
|
||||||
|
// We have to compact the entire range in one shot.
|
||||||
|
if (options_->compaction_style == kCompactionStyleUniversal) {
|
||||||
|
begin = nullptr;
|
||||||
|
end = nullptr;
|
||||||
|
}
|
||||||
|
version->GetOverlappingInputs(input_level, begin, end, &inputs);
|
||||||
|
if (inputs.empty()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid compacting too much in one shot in case the range is large.
|
||||||
|
// But we cannot do this for level-0 since level-0 files can overlap
|
||||||
|
// and we must not pick one file and drop another older file if the
|
||||||
|
// two files overlap.
|
||||||
|
if (input_level > 0) {
|
||||||
|
const uint64_t limit =
|
||||||
|
MaxFileSizeForLevel(input_level) * options_->source_compaction_factor;
|
||||||
|
uint64_t total = 0;
|
||||||
|
for (size_t i = 0; i + 1 < inputs.size(); ++i) {
|
||||||
|
uint64_t s = inputs[i]->file_size;
|
||||||
|
total += s;
|
||||||
|
if (total >= limit) {
|
||||||
|
**compaction_end = inputs[i + 1]->smallest;
|
||||||
|
covering_the_whole_range = false;
|
||||||
|
inputs.resize(i + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Compaction* c = new Compaction(version, input_level, output_level,
|
||||||
|
MaxFileSizeForLevel(output_level),
|
||||||
|
MaxGrandParentOverlapBytes(input_level));
|
||||||
|
|
||||||
|
c->inputs_[0] = inputs;
|
||||||
|
if (ExpandWhileOverlapping(c) == false) {
|
||||||
|
delete c;
|
||||||
|
Log(options_->info_log, "Could not compact due to expansion failure.\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetupOtherInputs(c);
|
||||||
|
|
||||||
|
if (covering_the_whole_range) {
|
||||||
|
*compaction_end = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// These files that are to be manaully compacted do not trample
|
||||||
|
// upon other files because manual compactions are processed when
|
||||||
|
// the system has a max of 1 background compaction thread.
|
||||||
|
c->MarkFilesBeingCompacted(true);
|
||||||
|
|
||||||
|
// Is this compaction creating a file at the bottommost level
|
||||||
|
c->SetupBottomMostLevel(true);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
Compaction* LevelCompactionPicker::PickCompaction(Version* version) {
|
||||||
|
Compaction* c = nullptr;
|
||||||
|
int level = -1;
|
||||||
|
|
||||||
|
// Compute the compactions needed. It is better to do it here
|
||||||
|
// and also in LogAndApply(), otherwise the values could be stale.
|
||||||
|
std::vector<uint64_t> size_being_compacted(NumberLevels() - 1);
|
||||||
|
SizeBeingCompacted(size_being_compacted);
|
||||||
|
version->Finalize(size_being_compacted);
|
||||||
|
|
||||||
|
// We prefer compactions triggered by too much data in a level over
|
||||||
|
// the compactions triggered by seeks.
|
||||||
|
//
|
||||||
|
// Find the compactions by size on all levels.
|
||||||
|
for (int i = 0; i < NumberLevels() - 1; i++) {
|
||||||
|
assert(i == 0 ||
|
||||||
|
version->compaction_score_[i] <= version->compaction_score_[i - 1]);
|
||||||
|
level = version->compaction_level_[i];
|
||||||
|
if ((version->compaction_score_[i] >= 1)) {
|
||||||
|
c = PickCompactionBySize(version, level, version->compaction_score_[i]);
|
||||||
|
if (ExpandWhileOverlapping(c) == false) {
|
||||||
|
delete c;
|
||||||
|
c = nullptr;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find compactions needed by seeks
|
||||||
|
FileMetaData* f = version->file_to_compact_;
|
||||||
|
if (c == nullptr && f != nullptr && !f->being_compacted) {
|
||||||
|
|
||||||
|
level = version->file_to_compact_level_;
|
||||||
|
int parent_index = -1;
|
||||||
|
|
||||||
|
// Only allow one level 0 compaction at a time.
|
||||||
|
// Do not pick this file if its parents at level+1 are being compacted.
|
||||||
|
if (level != 0 || compactions_in_progress_[0].empty()) {
|
||||||
|
if (!ParentRangeInCompaction(version, &f->smallest, &f->largest, level,
|
||||||
|
&parent_index)) {
|
||||||
|
c = new Compaction(version, level, level + 1,
|
||||||
|
MaxFileSizeForLevel(level + 1),
|
||||||
|
MaxGrandParentOverlapBytes(level), true);
|
||||||
|
c->inputs_[0].push_back(f);
|
||||||
|
c->parent_index_ = parent_index;
|
||||||
|
c->input_version_->file_to_compact_ = nullptr;
|
||||||
|
if (ExpandWhileOverlapping(c) == false) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == nullptr) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two level 0 compaction won't run at the same time, so don't need to worry
|
||||||
|
// about files on level 0 being compacted.
|
||||||
|
if (level == 0) {
|
||||||
|
assert(compactions_in_progress_[0].empty());
|
||||||
|
InternalKey smallest, largest;
|
||||||
|
GetRange(c->inputs_[0], &smallest, &largest);
|
||||||
|
// Note that the next call will discard the file we placed in
|
||||||
|
// c->inputs_[0] earlier and replace it with an overlapping set
|
||||||
|
// which will include the picked file.
|
||||||
|
c->inputs_[0].clear();
|
||||||
|
c->input_version_->GetOverlappingInputs(0, &smallest, &largest,
|
||||||
|
&c->inputs_[0]);
|
||||||
|
|
||||||
|
// If we include more L0 files in the same compaction run it can
|
||||||
|
// cause the 'smallest' and 'largest' key to get extended to a
|
||||||
|
// larger range. So, re-invoke GetRange to get the new key range
|
||||||
|
GetRange(c->inputs_[0], &smallest, &largest);
|
||||||
|
if (ParentRangeInCompaction(c->input_version_, &smallest, &largest, level,
|
||||||
|
&c->parent_index_)) {
|
||||||
|
delete c;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
assert(!c->inputs_[0].empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup "level+1" files (inputs_[1])
|
||||||
|
SetupOtherInputs(c);
|
||||||
|
|
||||||
|
// mark all the files that are being compacted
|
||||||
|
c->MarkFilesBeingCompacted(true);
|
||||||
|
|
||||||
|
// Is this compaction creating a file at the bottommost level
|
||||||
|
c->SetupBottomMostLevel(false);
|
||||||
|
|
||||||
|
// remember this currently undergoing compaction
|
||||||
|
compactions_in_progress_[level].insert(c);
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version,
|
||||||
|
int level,
|
||||||
|
double score) {
|
||||||
|
Compaction* c = nullptr;
|
||||||
|
|
||||||
|
// level 0 files are overlapping. So we cannot pick more
|
||||||
|
// than one concurrent compactions at this level. This
|
||||||
|
// could be made better by looking at key-ranges that are
|
||||||
|
// being compacted at level 0.
|
||||||
|
if (level == 0 && compactions_in_progress_[level].size() == 1) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(level >= 0);
|
||||||
|
assert(level + 1 < NumberLevels());
|
||||||
|
c = new Compaction(version, level, level + 1, MaxFileSizeForLevel(level + 1),
|
||||||
|
MaxGrandParentOverlapBytes(level));
|
||||||
|
c->score_ = score;
|
||||||
|
|
||||||
|
// Pick the largest file in this level that is not already
|
||||||
|
// being compacted
|
||||||
|
std::vector<int>& file_size = c->input_version_->files_by_size_[level];
|
||||||
|
|
||||||
|
// record the first file that is not yet compacted
|
||||||
|
int nextIndex = -1;
|
||||||
|
|
||||||
|
for (unsigned int i = c->input_version_->next_file_to_compact_by_size_[level];
|
||||||
|
i < file_size.size(); i++) {
|
||||||
|
int index = file_size[i];
|
||||||
|
FileMetaData* f = c->input_version_->files_[level][index];
|
||||||
|
|
||||||
|
// check to verify files are arranged in descending size
|
||||||
|
assert((i == file_size.size() - 1) ||
|
||||||
|
(i >= Version::number_of_files_to_sort_ - 1) ||
|
||||||
|
(f->file_size >=
|
||||||
|
c->input_version_->files_[level][file_size[i + 1]]->file_size));
|
||||||
|
|
||||||
|
// do not pick a file to compact if it is being compacted
|
||||||
|
// from n-1 level.
|
||||||
|
if (f->being_compacted) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// remember the startIndex for the next call to PickCompaction
|
||||||
|
if (nextIndex == -1) {
|
||||||
|
nextIndex = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
//if (i > Version::number_of_files_to_sort_) {
|
||||||
|
// Log(options_->info_log, "XXX Looking at index %d", i);
|
||||||
|
//}
|
||||||
|
|
||||||
|
// Do not pick this file if its parents at level+1 are being compacted.
|
||||||
|
// Maybe we can avoid redoing this work in SetupOtherInputs
|
||||||
|
int parent_index = -1;
|
||||||
|
if (ParentRangeInCompaction(c->input_version_, &f->smallest, &f->largest,
|
||||||
|
level, &parent_index)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
c->inputs_[0].push_back(f);
|
||||||
|
c->base_index_ = index;
|
||||||
|
c->parent_index_ = parent_index;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c->inputs_[0].empty()) {
|
||||||
|
delete c;
|
||||||
|
c = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// store where to start the iteration in the next call to PickCompaction
|
||||||
|
version->next_file_to_compact_by_size_[level] = nextIndex;
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Universal style of compaction. Pick files that are contiguous in
|
||||||
|
// time-range to compact.
|
||||||
|
//
|
||||||
|
Compaction* UniversalCompactionPicker::PickCompaction(Version* version) {
|
||||||
|
int level = 0;
|
||||||
|
double score = version->compaction_score_[0];
|
||||||
|
|
||||||
|
if ((version->files_[level].size() <
|
||||||
|
(unsigned int)options_->level0_file_num_compaction_trigger)) {
|
||||||
|
Log(options_->info_log, "Universal: nothing to do\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
Version::FileSummaryStorage tmp;
|
||||||
|
Log(options_->info_log, "Universal: candidate files(%lu): %s\n",
|
||||||
|
version->files_[level].size(),
|
||||||
|
version->LevelFileSummary(&tmp, 0));
|
||||||
|
|
||||||
|
// Check for size amplification first.
|
||||||
|
Compaction* c = PickCompactionUniversalSizeAmp(version, score);
|
||||||
|
if (c == nullptr) {
|
||||||
|
|
||||||
|
// Size amplification is within limits. Try reducing read
|
||||||
|
// amplification while maintaining file size ratios.
|
||||||
|
unsigned int ratio = options_->compaction_options_universal.size_ratio;
|
||||||
|
c = PickCompactionUniversalReadAmp(version, score, ratio, UINT_MAX);
|
||||||
|
|
||||||
|
// Size amplification and file size ratios are within configured limits.
|
||||||
|
// If max read amplification is exceeding configured limits, then force
|
||||||
|
// compaction without looking at filesize ratios and try to reduce
|
||||||
|
// the number of files to fewer than level0_file_num_compaction_trigger.
|
||||||
|
if (c == nullptr) {
|
||||||
|
unsigned int num_files = version->files_[level].size() -
|
||||||
|
options_->level0_file_num_compaction_trigger;
|
||||||
|
c = PickCompactionUniversalReadAmp(version, score, UINT_MAX, num_files);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (c == nullptr) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
assert(c->inputs_[0].size() > 1);
|
||||||
|
|
||||||
|
// validate that all the chosen files are non overlapping in time
|
||||||
|
FileMetaData* newerfile __attribute__((unused)) = nullptr;
|
||||||
|
for (unsigned int i = 0; i < c->inputs_[0].size(); i++) {
|
||||||
|
FileMetaData* f = c->inputs_[0][i];
|
||||||
|
assert (f->smallest_seqno <= f->largest_seqno);
|
||||||
|
assert(newerfile == nullptr ||
|
||||||
|
newerfile->smallest_seqno > f->largest_seqno);
|
||||||
|
newerfile = f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The files are sorted from newest first to oldest last.
|
||||||
|
std::vector<int>& file_by_time = c->input_version_->files_by_size_[level];
|
||||||
|
|
||||||
|
// Is the earliest file part of this compaction?
|
||||||
|
int last_index = file_by_time[file_by_time.size()-1];
|
||||||
|
FileMetaData* last_file = c->input_version_->files_[level][last_index];
|
||||||
|
if (c->inputs_[0][c->inputs_[0].size()-1] == last_file) {
|
||||||
|
c->bottommost_level_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update statistics
|
||||||
|
MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION,
|
||||||
|
c->inputs_[0].size());
|
||||||
|
|
||||||
|
// mark all the files that are being compacted
|
||||||
|
c->MarkFilesBeingCompacted(true);
|
||||||
|
|
||||||
|
// remember this currently undergoing compaction
|
||||||
|
compactions_in_progress_[level].insert(c);
|
||||||
|
|
||||||
|
// Record whether this compaction includes all sst files.
|
||||||
|
// For now, it is only relevant in universal compaction mode.
|
||||||
|
c->is_full_compaction_ =
|
||||||
|
(c->inputs_[0].size() == c->input_version_->files_[0].size());
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Consider compaction files based on their size differences with
|
||||||
|
// the next file in time order.
|
||||||
|
//
|
||||||
|
Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
|
||||||
|
Version* version, double score, unsigned int ratio,
|
||||||
|
unsigned int max_number_of_files_to_compact) {
|
||||||
|
int level = 0;
|
||||||
|
|
||||||
|
unsigned int min_merge_width =
|
||||||
|
options_->compaction_options_universal.min_merge_width;
|
||||||
|
unsigned int max_merge_width =
|
||||||
|
options_->compaction_options_universal.max_merge_width;
|
||||||
|
|
||||||
|
// The files are sorted from newest first to oldest last.
|
||||||
|
std::vector<int>& file_by_time = version->files_by_size_[level];
|
||||||
|
FileMetaData* f = nullptr;
|
||||||
|
bool done = false;
|
||||||
|
int start_index = 0;
|
||||||
|
unsigned int candidate_count;
|
||||||
|
assert(file_by_time.size() == version->files_[level].size());
|
||||||
|
|
||||||
|
unsigned int max_files_to_compact = std::min(max_merge_width,
|
||||||
|
max_number_of_files_to_compact);
|
||||||
|
min_merge_width = std::max(min_merge_width, 2U);
|
||||||
|
|
||||||
|
// Considers a candidate file only if it is smaller than the
|
||||||
|
// total size accumulated so far.
|
||||||
|
for (unsigned int loop = 0; loop < file_by_time.size(); loop++) {
|
||||||
|
|
||||||
|
candidate_count = 0;
|
||||||
|
|
||||||
|
// Skip files that are already being compacted
|
||||||
|
for (f = nullptr; loop < file_by_time.size(); loop++) {
|
||||||
|
int index = file_by_time[loop];
|
||||||
|
f = version->files_[level][index];
|
||||||
|
|
||||||
|
if (!f->being_compacted) {
|
||||||
|
candidate_count = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: file %lu[%d] being compacted, skipping",
|
||||||
|
(unsigned long)f->number, loop);
|
||||||
|
f = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This file is not being compacted. Consider it as the
|
||||||
|
// first candidate to be compacted.
|
||||||
|
uint64_t candidate_size = f != nullptr? f->file_size : 0;
|
||||||
|
if (f != nullptr) {
|
||||||
|
Log(options_->info_log, "Universal: Possible candidate file %lu[%d].",
|
||||||
|
(unsigned long)f->number, loop);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the suceeding files need compaction.
|
||||||
|
for (unsigned int i = loop+1;
|
||||||
|
candidate_count < max_files_to_compact && i < file_by_time.size();
|
||||||
|
i++) {
|
||||||
|
int index = file_by_time[i];
|
||||||
|
FileMetaData* f = version->files_[level][index];
|
||||||
|
if (f->being_compacted) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// pick files if the total candidate file size (increased by the
|
||||||
|
// specified ratio) is still larger than the next candidate file.
|
||||||
|
uint64_t sz = (candidate_size * (100L + ratio)) /100;
|
||||||
|
if (sz < f->file_size) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
candidate_count++;
|
||||||
|
candidate_size += f->file_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Found a series of consecutive files that need compaction.
|
||||||
|
if (candidate_count >= (unsigned int)min_merge_width) {
|
||||||
|
start_index = loop;
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
for (unsigned int i = loop;
|
||||||
|
i < loop + candidate_count && i < file_by_time.size(); i++) {
|
||||||
|
int index = file_by_time[i];
|
||||||
|
FileMetaData* f = version->files_[level][index];
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: Skipping file %lu[%d] with size %lu %d\n",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
i,
|
||||||
|
(unsigned long)f->file_size,
|
||||||
|
f->being_compacted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!done || candidate_count <= 1) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
unsigned int first_index_after = start_index + candidate_count;
|
||||||
|
// Compression is enabled if files compacted earlier already reached
|
||||||
|
// size ratio of compression.
|
||||||
|
bool enable_compression = true;
|
||||||
|
int ratio_to_compress =
|
||||||
|
options_->compaction_options_universal.compression_size_percent;
|
||||||
|
if (ratio_to_compress >= 0) {
|
||||||
|
uint64_t total_size = version->NumLevelBytes(level);
|
||||||
|
uint64_t older_file_size = 0;
|
||||||
|
for (unsigned int i = file_by_time.size() - 1; i >= first_index_after;
|
||||||
|
i--) {
|
||||||
|
older_file_size += version->files_[level][file_by_time[i]]->file_size;
|
||||||
|
if (older_file_size * 100L >= total_size * (long) ratio_to_compress) {
|
||||||
|
enable_compression = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Compaction* c =
|
||||||
|
new Compaction(version, level, level, MaxFileSizeForLevel(level),
|
||||||
|
LLONG_MAX, false, enable_compression);
|
||||||
|
c->score_ = score;
|
||||||
|
|
||||||
|
for (unsigned int i = start_index; i < first_index_after; i++) {
|
||||||
|
int index = file_by_time[i];
|
||||||
|
FileMetaData* f = c->input_version_->files_[level][index];
|
||||||
|
c->inputs_[0].push_back(f);
|
||||||
|
Log(options_->info_log, "Universal: Picking file %lu[%d] with size %lu\n",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
i,
|
||||||
|
(unsigned long)f->file_size);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look at overall size amplification. If size amplification
|
||||||
|
// exceeeds the configured value, then do a compaction
|
||||||
|
// of the candidate files all the way upto the earliest
|
||||||
|
// base file (overrides configured values of file-size ratios,
|
||||||
|
// min_merge_width and max_merge_width).
|
||||||
|
//
|
||||||
|
Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
|
||||||
|
Version* version, double score) {
|
||||||
|
int level = 0;
|
||||||
|
|
||||||
|
// percentage flexibilty while reducing size amplification
|
||||||
|
uint64_t ratio = options_->compaction_options_universal.
|
||||||
|
max_size_amplification_percent;
|
||||||
|
|
||||||
|
// The files are sorted from newest first to oldest last.
|
||||||
|
std::vector<int>& file_by_time = version->files_by_size_[level];
|
||||||
|
assert(file_by_time.size() == version->files_[level].size());
|
||||||
|
|
||||||
|
unsigned int candidate_count = 0;
|
||||||
|
uint64_t candidate_size = 0;
|
||||||
|
unsigned int start_index = 0;
|
||||||
|
FileMetaData* f = nullptr;
|
||||||
|
|
||||||
|
// Skip files that are already being compacted
|
||||||
|
for (unsigned int loop = 0; loop < file_by_time.size() - 1; loop++) {
|
||||||
|
int index = file_by_time[loop];
|
||||||
|
f = version->files_[level][index];
|
||||||
|
if (!f->being_compacted) {
|
||||||
|
start_index = loop; // Consider this as the first candidate.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Log(options_->info_log, "Universal: skipping file %lu[%d] compacted %s",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
loop,
|
||||||
|
" cannot be a candidate to reduce size amp.\n");
|
||||||
|
f = nullptr;
|
||||||
|
}
|
||||||
|
if (f == nullptr) {
|
||||||
|
return nullptr; // no candidate files
|
||||||
|
}
|
||||||
|
|
||||||
|
Log(options_->info_log, "Universal: First candidate file %lu[%d] %s",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
start_index,
|
||||||
|
" to reduce size amp.\n");
|
||||||
|
|
||||||
|
// keep adding up all the remaining files
|
||||||
|
for (unsigned int loop = start_index; loop < file_by_time.size() - 1;
|
||||||
|
loop++) {
|
||||||
|
int index = file_by_time[loop];
|
||||||
|
f = version->files_[level][index];
|
||||||
|
if (f->being_compacted) {
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: Possible candidate file %lu[%d] %s.",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
loop,
|
||||||
|
" is already being compacted. No size amp reduction possible.\n");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
candidate_size += f->file_size;
|
||||||
|
candidate_count++;
|
||||||
|
}
|
||||||
|
if (candidate_count == 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// size of earliest file
|
||||||
|
int index = file_by_time[file_by_time.size() - 1];
|
||||||
|
uint64_t earliest_file_size = version->files_[level][index]->file_size;
|
||||||
|
|
||||||
|
// size amplification = percentage of additional size
|
||||||
|
if (candidate_size * 100 < ratio * earliest_file_size) {
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: size amp not needed. newer-files-total-size %lu "
|
||||||
|
"earliest-file-size %lu",
|
||||||
|
(unsigned long)candidate_size,
|
||||||
|
(unsigned long)earliest_file_size);
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: size amp needed. newer-files-total-size %lu "
|
||||||
|
"earliest-file-size %lu",
|
||||||
|
(unsigned long)candidate_size,
|
||||||
|
(unsigned long)earliest_file_size);
|
||||||
|
}
|
||||||
|
assert(start_index >= 0 && start_index < file_by_time.size() - 1);
|
||||||
|
|
||||||
|
// create a compaction request
|
||||||
|
// We always compact all the files, so always compress.
|
||||||
|
Compaction* c =
|
||||||
|
new Compaction(version, level, level, MaxFileSizeForLevel(level),
|
||||||
|
LLONG_MAX, false, true);
|
||||||
|
c->score_ = score;
|
||||||
|
for (unsigned int loop = start_index; loop < file_by_time.size(); loop++) {
|
||||||
|
int index = file_by_time[loop];
|
||||||
|
f = c->input_version_->files_[level][index];
|
||||||
|
c->inputs_[0].push_back(f);
|
||||||
|
Log(options_->info_log,
|
||||||
|
"Universal: size amp picking file %lu[%d] with size %lu",
|
||||||
|
(unsigned long)f->number,
|
||||||
|
index,
|
||||||
|
(unsigned long)f->file_size);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
162
db/compaction_picker.h
Normal file
162
db/compaction_picker.h
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "db/version_set.h"
|
||||||
|
#include "db/compaction.h"
|
||||||
|
#include "rocksdb/status.h"
|
||||||
|
#include "rocksdb/options.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class Compaction;
|
||||||
|
class Version;
|
||||||
|
|
||||||
|
class CompactionPicker {
|
||||||
|
public:
|
||||||
|
CompactionPicker(const Options* options, const InternalKeyComparator* icmp);
|
||||||
|
virtual ~CompactionPicker();
|
||||||
|
|
||||||
|
// See VersionSet::ReduceNumberOfLevels()
|
||||||
|
void ReduceNumberOfLevels(int new_levels);
|
||||||
|
|
||||||
|
// Pick level and inputs for a new compaction.
|
||||||
|
// Returns nullptr if there is no compaction to be done.
|
||||||
|
// Otherwise returns a pointer to a heap-allocated object that
|
||||||
|
// describes the compaction. Caller should delete the result.
|
||||||
|
virtual Compaction* PickCompaction(Version* version) = 0;
|
||||||
|
|
||||||
|
// Return a compaction object for compacting the range [begin,end] in
|
||||||
|
// the specified level. Returns nullptr if there is nothing in that
|
||||||
|
// level that overlaps the specified range. Caller should delete
|
||||||
|
// the result.
|
||||||
|
//
|
||||||
|
// The returned Compaction might not include the whole requested range.
|
||||||
|
// In that case, compaction_end will be set to the next key that needs
|
||||||
|
// compacting. In case the compaction will compact the whole range,
|
||||||
|
// compaction_end will be set to nullptr.
|
||||||
|
// Client is responsible for compaction_end storage -- when called,
|
||||||
|
// *compaction_end should point to valid InternalKey!
|
||||||
|
Compaction* CompactRange(Version* version, int input_level, int output_level,
|
||||||
|
const InternalKey* begin, const InternalKey* end,
|
||||||
|
InternalKey** compaction_end);
|
||||||
|
|
||||||
|
// Free up the files that participated in a compaction
|
||||||
|
void ReleaseCompactionFiles(Compaction* c, Status status);
|
||||||
|
|
||||||
|
// Return the total amount of data that is undergoing
|
||||||
|
// compactions per level
|
||||||
|
void SizeBeingCompacted(std::vector<uint64_t>& sizes);
|
||||||
|
|
||||||
|
// Returns maximum total overlap bytes with grandparent
|
||||||
|
// level (i.e., level+2) before we stop building a single
|
||||||
|
// file in level->level+1 compaction.
|
||||||
|
uint64_t MaxGrandParentOverlapBytes(int level);
|
||||||
|
|
||||||
|
// Returns maximum total bytes of data on a given level.
|
||||||
|
double MaxBytesForLevel(int level);
|
||||||
|
|
||||||
|
// Get the max file size in a given level.
|
||||||
|
uint64_t MaxFileSizeForLevel(int level) const;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int NumberLevels() const { return num_levels_; }
|
||||||
|
|
||||||
|
// Stores the minimal range that covers all entries in inputs in
|
||||||
|
// *smallest, *largest.
|
||||||
|
// REQUIRES: inputs is not empty
|
||||||
|
void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest,
|
||||||
|
InternalKey* largest);
|
||||||
|
|
||||||
|
// Stores the minimal range that covers all entries in inputs1 and inputs2
|
||||||
|
// in *smallest, *largest.
|
||||||
|
// REQUIRES: inputs is not empty
|
||||||
|
void GetRange(const std::vector<FileMetaData*>& inputs1,
|
||||||
|
const std::vector<FileMetaData*>& inputs2,
|
||||||
|
InternalKey* smallest, InternalKey* largest);
|
||||||
|
|
||||||
|
// Add more files to the inputs on "level" to make sure that
|
||||||
|
// no newer version of a key is compacted to "level+1" while leaving an older
|
||||||
|
// version in a "level". Otherwise, any Get() will search "level" first,
|
||||||
|
// and will likely return an old/stale value for the key, since it always
|
||||||
|
// searches in increasing order of level to find the value. This could
|
||||||
|
// also scramble the order of merge operands. This function should be
|
||||||
|
// called any time a new Compaction is created, and its inputs_[0] are
|
||||||
|
// populated.
|
||||||
|
//
|
||||||
|
// Will return false if it is impossible to apply this compaction.
|
||||||
|
bool ExpandWhileOverlapping(Compaction* c);
|
||||||
|
|
||||||
|
uint64_t ExpandedCompactionByteSizeLimit(int level);
|
||||||
|
|
||||||
|
// Returns true if any one of the specified files are being compacted
|
||||||
|
bool FilesInCompaction(std::vector<FileMetaData*>& files);
|
||||||
|
|
||||||
|
// Returns true if any one of the parent files are being compacted
|
||||||
|
bool ParentRangeInCompaction(Version* version, const InternalKey* smallest,
|
||||||
|
const InternalKey* largest, int level,
|
||||||
|
int* index);
|
||||||
|
|
||||||
|
void SetupOtherInputs(Compaction* c);
|
||||||
|
|
||||||
|
// record all the ongoing compactions for all levels
|
||||||
|
std::vector<std::set<Compaction*>> compactions_in_progress_;
|
||||||
|
|
||||||
|
// Per-level target file size.
|
||||||
|
std::unique_ptr<uint64_t[]> max_file_size_;
|
||||||
|
|
||||||
|
// Per-level max bytes
|
||||||
|
std::unique_ptr<uint64_t[]> level_max_bytes_;
|
||||||
|
|
||||||
|
const Options* const options_;
|
||||||
|
private:
|
||||||
|
void Init();
|
||||||
|
|
||||||
|
int num_levels_;
|
||||||
|
|
||||||
|
const InternalKeyComparator* const icmp_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class UniversalCompactionPicker : public CompactionPicker {
|
||||||
|
public:
|
||||||
|
UniversalCompactionPicker(const Options* options,
|
||||||
|
const InternalKeyComparator* icmp)
|
||||||
|
: CompactionPicker(options, icmp) {}
|
||||||
|
virtual Compaction* PickCompaction(Version* version) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Pick Universal compaction to limit read amplification
|
||||||
|
Compaction* PickCompactionUniversalReadAmp(Version* version, double score,
|
||||||
|
unsigned int ratio,
|
||||||
|
unsigned int num_files);
|
||||||
|
|
||||||
|
// Pick Universal compaction to limit space amplification.
|
||||||
|
Compaction* PickCompactionUniversalSizeAmp(Version* version, double score);
|
||||||
|
};
|
||||||
|
|
||||||
|
class LevelCompactionPicker : public CompactionPicker {
|
||||||
|
public:
|
||||||
|
LevelCompactionPicker(const Options* options,
|
||||||
|
const InternalKeyComparator* icmp)
|
||||||
|
: CompactionPicker(options, icmp) {}
|
||||||
|
virtual Compaction* PickCompaction(Version* version) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// For the specfied level, pick a compaction.
|
||||||
|
// Returns nullptr if there is no compaction to be done.
|
||||||
|
// If level is 0 and there is already a compaction on that level, this
|
||||||
|
// function will return nullptr.
|
||||||
|
Compaction* PickCompactionBySize(Version* version, int level, double score);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -14,7 +14,7 @@
|
|||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/db_statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
@ -30,6 +30,7 @@
|
|||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
#include "util/stack_trace.h"
|
#include "util/stack_trace.h"
|
||||||
#include "util/string_util.h"
|
#include "util/string_util.h"
|
||||||
|
#include "util/statistics.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
#include "hdfs/env_hdfs.h"
|
#include "hdfs/env_hdfs.h"
|
||||||
#include "utilities/merge_operators.h"
|
#include "utilities/merge_operators.h"
|
||||||
@ -355,9 +356,9 @@ static bool ValidateCompressionLevel(const char* flagname, int32_t value) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const bool FLAGS_compression_level_dummy =
|
static const bool FLAGS_compression_level_dummy __attribute__((unused)) =
|
||||||
google::RegisterFlagValidator(&FLAGS_compression_level,
|
google::RegisterFlagValidator(&FLAGS_compression_level,
|
||||||
&ValidateCompressionLevel);
|
&ValidateCompressionLevel);
|
||||||
|
|
||||||
DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
|
DEFINE_int32(min_level_to_compress, -1, "If non-negative, compression starts"
|
||||||
" from this level. Levels with number < min_level_to_compress are"
|
" from this level. Levels with number < min_level_to_compress are"
|
||||||
|
@ -74,7 +74,7 @@ Status DBImpl::GetLiveFiles(std::vector<std::string>& ret,
|
|||||||
|
|
||||||
// Make a set of all of the live *.sst files
|
// Make a set of all of the live *.sst files
|
||||||
std::set<uint64_t> live;
|
std::set<uint64_t> live;
|
||||||
versions_->AddLiveFilesCurrentVersion(&live);
|
versions_->current()->AddLiveFiles(&live);
|
||||||
|
|
||||||
ret.clear();
|
ret.clear();
|
||||||
ret.reserve(live.size() + 2); //*.sst + CURRENT + MANIFEST
|
ret.reserve(live.size() + 2); //*.sst + CURRENT + MANIFEST
|
||||||
|
293
db/db_impl.cc
293
db/db_impl.cc
@ -57,6 +57,7 @@
|
|||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
#include "util/perf_context_imp.h"
|
#include "util/perf_context_imp.h"
|
||||||
#include "util/stop_watch.h"
|
#include "util/stop_watch.h"
|
||||||
|
#include "util/autovector.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -254,8 +255,8 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
|||||||
: env_(options.env),
|
: env_(options.env),
|
||||||
dbname_(dbname),
|
dbname_(dbname),
|
||||||
internal_comparator_(options.comparator),
|
internal_comparator_(options.comparator),
|
||||||
options_(SanitizeOptions(
|
options_(SanitizeOptions(dbname, &internal_comparator_,
|
||||||
dbname, &internal_comparator_, &internal_filter_policy_, options)),
|
&internal_filter_policy_, options)),
|
||||||
internal_filter_policy_(options.filter_policy),
|
internal_filter_policy_(options.filter_policy),
|
||||||
owns_info_log_(options_.info_log != options.info_log),
|
owns_info_log_(options_.info_log != options.info_log),
|
||||||
db_lock_(nullptr),
|
db_lock_(nullptr),
|
||||||
@ -263,8 +264,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
|||||||
shutting_down_(nullptr),
|
shutting_down_(nullptr),
|
||||||
bg_cv_(&mutex_),
|
bg_cv_(&mutex_),
|
||||||
mem_rep_factory_(options_.memtable_factory.get()),
|
mem_rep_factory_(options_.memtable_factory.get()),
|
||||||
mem_(new MemTable(internal_comparator_, mem_rep_factory_,
|
mem_(new MemTable(internal_comparator_, options_)),
|
||||||
NumberLevels(), options_)),
|
|
||||||
logfile_number_(0),
|
logfile_number_(0),
|
||||||
super_version_(nullptr),
|
super_version_(nullptr),
|
||||||
tmp_batch_(),
|
tmp_batch_(),
|
||||||
@ -410,7 +410,7 @@ uint64_t DBImpl::TEST_Current_Manifest_FileNo() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::NewDB() {
|
Status DBImpl::NewDB() {
|
||||||
VersionEdit new_db(NumberLevels());
|
VersionEdit new_db;
|
||||||
new_db.SetComparatorName(user_comparator()->Name());
|
new_db.SetComparatorName(user_comparator()->Name());
|
||||||
new_db.SetLogNumber(0);
|
new_db.SetLogNumber(0);
|
||||||
new_db.SetNextFile(2);
|
new_db.SetNextFile(2);
|
||||||
@ -1048,8 +1048,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|||||||
WriteBatchInternal::SetContents(&batch, record);
|
WriteBatchInternal::SetContents(&batch, record);
|
||||||
|
|
||||||
if (mem == nullptr) {
|
if (mem == nullptr) {
|
||||||
mem = new MemTable(internal_comparator_, mem_rep_factory_,
|
mem = new MemTable(internal_comparator_, options_);
|
||||||
NumberLevels(), options_);
|
|
||||||
mem->Ref();
|
mem->Ref();
|
||||||
}
|
}
|
||||||
status = WriteBatchInternal::InsertInto(&batch, mem, &options_);
|
status = WriteBatchInternal::InsertInto(&batch, mem, &options_);
|
||||||
@ -1300,6 +1299,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
|||||||
void DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
void DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
||||||
const Slice* begin, const Slice* end,
|
const Slice* begin, const Slice* end,
|
||||||
bool reduce_level, int target_level) {
|
bool reduce_level, int target_level) {
|
||||||
|
FlushMemTable(FlushOptions());
|
||||||
int max_level_with_files = 1;
|
int max_level_with_files = 1;
|
||||||
{
|
{
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
@ -1310,9 +1310,15 @@ void DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST_FlushMemTable(); // TODO(sanjay): Skip if memtable does not overlap
|
for (int level = 0; level <= max_level_with_files; level++) {
|
||||||
for (int level = 0; level < max_level_with_files; level++) {
|
// in case the compaction is unversal or if we're compacting the
|
||||||
TEST_CompactRange(level, begin, end);
|
// bottom-most level, the output level will be the same as input one
|
||||||
|
if (options_.compaction_style == kCompactionStyleUniversal ||
|
||||||
|
level == max_level_with_files) {
|
||||||
|
RunManualCompaction(level, level, begin, end);
|
||||||
|
} else {
|
||||||
|
RunManualCompaction(level, level + 1, begin, end);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reduce_level) {
|
if (reduce_level) {
|
||||||
@ -1324,13 +1330,13 @@ void DBImpl::CompactRange(const ColumnFamilyHandle& column_family,
|
|||||||
// return the same level if it cannot be moved
|
// return the same level if it cannot be moved
|
||||||
int DBImpl::FindMinimumEmptyLevelFitting(int level) {
|
int DBImpl::FindMinimumEmptyLevelFitting(int level) {
|
||||||
mutex_.AssertHeld();
|
mutex_.AssertHeld();
|
||||||
|
Version* current = versions_->current();
|
||||||
int minimum_level = level;
|
int minimum_level = level;
|
||||||
for (int i = level - 1; i > 0; --i) {
|
for (int i = level - 1; i > 0; --i) {
|
||||||
// stop if level i is not empty
|
// stop if level i is not empty
|
||||||
if (versions_->NumLevelFiles(i) > 0) break;
|
if (current->NumLevelFiles(i) > 0) break;
|
||||||
|
|
||||||
// stop if level i is too small (cannot fit the level files)
|
// stop if level i is too small (cannot fit the level files)
|
||||||
if (versions_->MaxBytesForLevel(i) < versions_->NumLevelBytes(level)) break;
|
if (versions_->MaxBytesForLevel(i) < current->NumLevelBytes(level)) break;
|
||||||
|
|
||||||
minimum_level = i;
|
minimum_level = i;
|
||||||
}
|
}
|
||||||
@ -1376,7 +1382,7 @@ void DBImpl::ReFitLevel(int level, int target_level) {
|
|||||||
Log(options_.info_log, "Before refitting:\n%s",
|
Log(options_.info_log, "Before refitting:\n%s",
|
||||||
versions_->current()->DebugString().data());
|
versions_->current()->DebugString().data());
|
||||||
|
|
||||||
VersionEdit edit(NumberLevels());
|
VersionEdit edit;
|
||||||
for (const auto& f : versions_->current()->files_[level]) {
|
for (const auto& f : versions_->current()->files_[level]) {
|
||||||
edit.DeleteFile(level, f->number);
|
edit.DeleteFile(level, f->number);
|
||||||
edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest,
|
edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest,
|
||||||
@ -1612,13 +1618,17 @@ Status DBImpl::AppendSortedWalsOfType(const std::string& path,
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
|
void DBImpl::RunManualCompaction(int input_level,
|
||||||
assert(level >= 0);
|
int output_level,
|
||||||
|
const Slice* begin,
|
||||||
|
const Slice* end) {
|
||||||
|
assert(input_level >= 0);
|
||||||
|
|
||||||
InternalKey begin_storage, end_storage;
|
InternalKey begin_storage, end_storage;
|
||||||
|
|
||||||
ManualCompaction manual;
|
ManualCompaction manual;
|
||||||
manual.level = level;
|
manual.input_level = input_level;
|
||||||
|
manual.output_level = output_level;
|
||||||
manual.done = false;
|
manual.done = false;
|
||||||
manual.in_progress = false;
|
manual.in_progress = false;
|
||||||
// For universal compaction, we enforce every manual compaction to compact
|
// For universal compaction, we enforce every manual compaction to compact
|
||||||
@ -1646,11 +1656,11 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
|
|||||||
// can compact any range of keys/files.
|
// can compact any range of keys/files.
|
||||||
//
|
//
|
||||||
// bg_manual_only_ is non-zero when at least one thread is inside
|
// bg_manual_only_ is non-zero when at least one thread is inside
|
||||||
// TEST_CompactRange(), i.e. during that time no other compaction will
|
// RunManualCompaction(), i.e. during that time no other compaction will
|
||||||
// get scheduled (see MaybeScheduleFlushOrCompaction).
|
// get scheduled (see MaybeScheduleFlushOrCompaction).
|
||||||
//
|
//
|
||||||
// Note that the following loop doesn't stop more that one thread calling
|
// Note that the following loop doesn't stop more that one thread calling
|
||||||
// TEST_CompactRange() from getting to the second while loop below.
|
// RunManualCompaction() from getting to the second while loop below.
|
||||||
// However, only one of them will actually schedule compaction, while
|
// However, only one of them will actually schedule compaction, while
|
||||||
// others will wait on a condition variable until it completes.
|
// others will wait on a condition variable until it completes.
|
||||||
|
|
||||||
@ -1680,6 +1690,15 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
|
|||||||
--bg_manual_only_;
|
--bg_manual_only_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DBImpl::TEST_CompactRange(int level,
|
||||||
|
const Slice* begin,
|
||||||
|
const Slice* end) {
|
||||||
|
int output_level = (options_.compaction_style == kCompactionStyleUniversal)
|
||||||
|
? level
|
||||||
|
: level + 1;
|
||||||
|
RunManualCompaction(level, output_level, begin, end);
|
||||||
|
}
|
||||||
|
|
||||||
Status DBImpl::FlushMemTable(const FlushOptions& options) {
|
Status DBImpl::FlushMemTable(const FlushOptions& options) {
|
||||||
// nullptr batch means just wait for earlier writes to be done
|
// nullptr batch means just wait for earlier writes to be done
|
||||||
Status s = Write(WriteOptions(), nullptr);
|
Status s = Write(WriteOptions(), nullptr);
|
||||||
@ -1825,6 +1844,11 @@ void DBImpl::TEST_PurgeObsoleteteWAL() {
|
|||||||
PurgeObsoleteWALFiles();
|
PurgeObsoleteWALFiles();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t DBImpl::TEST_GetLevel0TotalSize() {
|
||||||
|
MutexLock l(&mutex_);
|
||||||
|
return versions_->current()->NumLevelBytes(0);
|
||||||
|
}
|
||||||
|
|
||||||
void DBImpl::BackgroundCallCompaction() {
|
void DBImpl::BackgroundCallCompaction() {
|
||||||
bool madeProgress = false;
|
bool madeProgress = false;
|
||||||
DeletionState deletion_state(options_.max_write_buffer_number, true);
|
DeletionState deletion_state(options_.max_write_buffer_number, true);
|
||||||
@ -1899,23 +1923,27 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
|||||||
unique_ptr<Compaction> c;
|
unique_ptr<Compaction> c;
|
||||||
bool is_manual = (manual_compaction_ != nullptr) &&
|
bool is_manual = (manual_compaction_ != nullptr) &&
|
||||||
(manual_compaction_->in_progress == false);
|
(manual_compaction_->in_progress == false);
|
||||||
InternalKey manual_end;
|
InternalKey manual_end_storage;
|
||||||
|
InternalKey* manual_end = &manual_end_storage;
|
||||||
if (is_manual) {
|
if (is_manual) {
|
||||||
ManualCompaction* m = manual_compaction_;
|
ManualCompaction* m = manual_compaction_;
|
||||||
assert(!m->in_progress);
|
assert(!m->in_progress);
|
||||||
m->in_progress = true; // another thread cannot pick up the same work
|
m->in_progress = true; // another thread cannot pick up the same work
|
||||||
c.reset(versions_->CompactRange(m->level, m->begin, m->end));
|
c.reset(versions_->CompactRange(
|
||||||
if (c) {
|
m->input_level, m->output_level, m->begin, m->end, &manual_end));
|
||||||
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
|
if (!c) {
|
||||||
} else {
|
|
||||||
m->done = true;
|
m->done = true;
|
||||||
}
|
}
|
||||||
Log(options_.info_log,
|
Log(options_.info_log,
|
||||||
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
|
"Manual compaction from level-%d to level-%d from %s .. %s; will stop "
|
||||||
m->level,
|
"at %s\n",
|
||||||
|
m->input_level,
|
||||||
|
m->output_level,
|
||||||
(m->begin ? m->begin->DebugString().c_str() : "(begin)"),
|
(m->begin ? m->begin->DebugString().c_str() : "(begin)"),
|
||||||
(m->end ? m->end->DebugString().c_str() : "(end)"),
|
(m->end ? m->end->DebugString().c_str() : "(end)"),
|
||||||
(m->done ? "(end)" : manual_end.DebugString().c_str()));
|
((m->done || manual_end == nullptr)
|
||||||
|
? "(end)"
|
||||||
|
: manual_end->DebugString().c_str()));
|
||||||
} else if (!options_.disable_auto_compactions) {
|
} else if (!options_.disable_auto_compactions) {
|
||||||
c.reset(versions_->PickCompaction());
|
c.reset(versions_->PickCompaction());
|
||||||
}
|
}
|
||||||
@ -1934,13 +1962,11 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
|||||||
f->smallest_seqno, f->largest_seqno);
|
f->smallest_seqno, f->largest_seqno);
|
||||||
status = versions_->LogAndApply(c->edit(), &mutex_);
|
status = versions_->LogAndApply(c->edit(), &mutex_);
|
||||||
InstallSuperVersion(deletion_state);
|
InstallSuperVersion(deletion_state);
|
||||||
VersionSet::LevelSummaryStorage tmp;
|
Version::LevelSummaryStorage tmp;
|
||||||
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
||||||
static_cast<unsigned long long>(f->number),
|
static_cast<unsigned long long>(f->number), c->level() + 1,
|
||||||
c->level() + 1,
|
|
||||||
static_cast<unsigned long long>(f->file_size),
|
static_cast<unsigned long long>(f->file_size),
|
||||||
status.ToString().c_str(),
|
status.ToString().c_str(), versions_->current()->LevelSummary(&tmp));
|
||||||
versions_->LevelSummary(&tmp));
|
|
||||||
versions_->ReleaseCompactionFiles(c.get(), status);
|
versions_->ReleaseCompactionFiles(c.get(), status);
|
||||||
*madeProgress = true;
|
*madeProgress = true;
|
||||||
} else {
|
} else {
|
||||||
@ -1980,13 +2006,19 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
|||||||
// Also note that, if we don't stop here, then the current compaction
|
// Also note that, if we don't stop here, then the current compaction
|
||||||
// writes a new file back to level 0, which will be used in successive
|
// writes a new file back to level 0, which will be used in successive
|
||||||
// compaction. Hence the manual compaction will never finish.
|
// compaction. Hence the manual compaction will never finish.
|
||||||
if (options_.compaction_style == kCompactionStyleUniversal) {
|
//
|
||||||
|
// Stop the compaction if manual_end points to nullptr -- this means
|
||||||
|
// that we compacted the whole range. manual_end should always point
|
||||||
|
// to nullptr in case of universal compaction
|
||||||
|
if (manual_end == nullptr) {
|
||||||
m->done = true;
|
m->done = true;
|
||||||
}
|
}
|
||||||
if (!m->done) {
|
if (!m->done) {
|
||||||
// We only compacted part of the requested range. Update *m
|
// We only compacted part of the requested range. Update *m
|
||||||
// to the range that is left to be compacted.
|
// to the range that is left to be compacted.
|
||||||
m->tmp_storage = manual_end;
|
// Universal compaction should always compact the whole range
|
||||||
|
assert(options_.compaction_style != kCompactionStyleUniversal);
|
||||||
|
m->tmp_storage = *manual_end;
|
||||||
m->begin = &m->tmp_storage;
|
m->begin = &m->tmp_storage;
|
||||||
}
|
}
|
||||||
m->in_progress = false; // not being processed anymore
|
m->in_progress = false; // not being processed anymore
|
||||||
@ -2018,14 +2050,14 @@ void DBImpl::CleanupCompaction(CompactionState* compact, Status status) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the file numbers for the output file. We allocate as
|
// Allocate the file numbers for the output file. We allocate as
|
||||||
// many output file numbers as there are files in level+1.
|
// many output file numbers as there are files in level+1 (at least one)
|
||||||
// Insert them into pending_outputs so that they do not get deleted.
|
// Insert them into pending_outputs so that they do not get deleted.
|
||||||
void DBImpl::AllocateCompactionOutputFileNumbers(CompactionState* compact) {
|
void DBImpl::AllocateCompactionOutputFileNumbers(CompactionState* compact) {
|
||||||
mutex_.AssertHeld();
|
mutex_.AssertHeld();
|
||||||
assert(compact != nullptr);
|
assert(compact != nullptr);
|
||||||
assert(compact->builder == nullptr);
|
assert(compact->builder == nullptr);
|
||||||
int filesNeeded = compact->compaction->num_input_files(1);
|
int filesNeeded = compact->compaction->num_input_files(1);
|
||||||
for (int i = 0; i < filesNeeded; i++) {
|
for (int i = 0; i < std::max(filesNeeded, 1); i++) {
|
||||||
uint64_t file_number = versions_->NewFileNumber();
|
uint64_t file_number = versions_->NewFileNumber();
|
||||||
pending_outputs_.insert(file_number);
|
pending_outputs_.insert(file_number);
|
||||||
compact->allocated_file_numbers.push_back(file_number);
|
compact->allocated_file_numbers.push_back(file_number);
|
||||||
@ -2169,14 +2201,11 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|||||||
|
|
||||||
// Add compaction outputs
|
// Add compaction outputs
|
||||||
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
||||||
const int level = compact->compaction->level();
|
|
||||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||||
const CompactionState::Output& out = compact->outputs[i];
|
const CompactionState::Output& out = compact->outputs[i];
|
||||||
compact->compaction->edit()->AddFile(
|
compact->compaction->edit()->AddFile(
|
||||||
(options_.compaction_style == kCompactionStyleUniversal) ?
|
compact->compaction->output_level(), out.number, out.file_size,
|
||||||
level : level + 1,
|
out.smallest, out.largest, out.smallest_seqno, out.largest_seqno);
|
||||||
out.number, out.file_size, out.smallest, out.largest,
|
|
||||||
out.smallest_seqno, out.largest_seqno);
|
|
||||||
}
|
}
|
||||||
return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
|
return versions_->LogAndApply(compact->compaction->edit(), &mutex_);
|
||||||
}
|
}
|
||||||
@ -2218,14 +2247,14 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
|||||||
compact->compaction->num_input_files(0),
|
compact->compaction->num_input_files(0),
|
||||||
compact->compaction->level(),
|
compact->compaction->level(),
|
||||||
compact->compaction->num_input_files(1),
|
compact->compaction->num_input_files(1),
|
||||||
compact->compaction->level() + 1,
|
compact->compaction->output_level(),
|
||||||
compact->compaction->score(),
|
compact->compaction->score(),
|
||||||
options_.max_background_compactions - bg_compaction_scheduled_);
|
options_.max_background_compactions - bg_compaction_scheduled_);
|
||||||
char scratch[256];
|
char scratch[256];
|
||||||
compact->compaction->Summary(scratch, sizeof(scratch));
|
compact->compaction->Summary(scratch, sizeof(scratch));
|
||||||
Log(options_.info_log, "Compaction start summary: %s\n", scratch);
|
Log(options_.info_log, "Compaction start summary: %s\n", scratch);
|
||||||
|
|
||||||
assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
|
assert(versions_->current()->NumLevelFiles(compact->compaction->level()) > 0);
|
||||||
assert(compact->builder == nullptr);
|
assert(compact->builder == nullptr);
|
||||||
assert(!compact->outfile);
|
assert(!compact->outfile);
|
||||||
|
|
||||||
@ -2553,9 +2582,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
|||||||
|
|
||||||
CompactionStats stats;
|
CompactionStats stats;
|
||||||
stats.micros = env_->NowMicros() - start_micros - imm_micros;
|
stats.micros = env_->NowMicros() - start_micros - imm_micros;
|
||||||
if (options_.statistics.get()) {
|
MeasureTime(options_.statistics.get(), COMPACTION_TIME, stats.micros);
|
||||||
options_.statistics.get()->measureTime(COMPACTION_TIME, stats.micros);
|
|
||||||
}
|
|
||||||
stats.files_in_leveln = compact->compaction->num_input_files(0);
|
stats.files_in_leveln = compact->compaction->num_input_files(0);
|
||||||
stats.files_in_levelnp1 = compact->compaction->num_input_files(1);
|
stats.files_in_levelnp1 = compact->compaction->num_input_files(1);
|
||||||
|
|
||||||
@ -2597,22 +2624,21 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
|||||||
status = InstallCompactionResults(compact);
|
status = InstallCompactionResults(compact);
|
||||||
InstallSuperVersion(deletion_state);
|
InstallSuperVersion(deletion_state);
|
||||||
}
|
}
|
||||||
VersionSet::LevelSummaryStorage tmp;
|
Version::LevelSummaryStorage tmp;
|
||||||
Log(options_.info_log,
|
Log(options_.info_log,
|
||||||
"compacted to: %s, %.1f MB/sec, level %d, files in(%d, %d) out(%d) "
|
"compacted to: %s, %.1f MB/sec, level %d, files in(%d, %d) out(%d) "
|
||||||
"MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
|
"MB in(%.1f, %.1f) out(%.1f), read-write-amplify(%.1f) "
|
||||||
"write-amplify(%.1f) %s\n",
|
"write-amplify(%.1f) %s\n",
|
||||||
versions_->LevelSummary(&tmp),
|
versions_->current()->LevelSummary(&tmp),
|
||||||
(stats.bytes_readn + stats.bytes_readnp1 + stats.bytes_written) /
|
(stats.bytes_readn + stats.bytes_readnp1 + stats.bytes_written) /
|
||||||
(double) stats.micros,
|
(double)stats.micros,
|
||||||
compact->compaction->output_level(),
|
compact->compaction->output_level(), stats.files_in_leveln,
|
||||||
stats.files_in_leveln, stats.files_in_levelnp1, stats.files_out_levelnp1,
|
stats.files_in_levelnp1, stats.files_out_levelnp1,
|
||||||
stats.bytes_readn / 1048576.0,
|
stats.bytes_readn / 1048576.0, stats.bytes_readnp1 / 1048576.0,
|
||||||
stats.bytes_readnp1 / 1048576.0,
|
|
||||||
stats.bytes_written / 1048576.0,
|
stats.bytes_written / 1048576.0,
|
||||||
(stats.bytes_written + stats.bytes_readnp1 + stats.bytes_readn) /
|
(stats.bytes_written + stats.bytes_readnp1 + stats.bytes_readn) /
|
||||||
(double) stats.bytes_readn,
|
(double)stats.bytes_readn,
|
||||||
stats.bytes_written / (double) stats.bytes_readn,
|
stats.bytes_written / (double)stats.bytes_readn,
|
||||||
status.ToString().c_str());
|
status.ToString().c_str());
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
@ -2649,38 +2675,40 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
|
|||||||
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
|
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
|
||||||
SequenceNumber* latest_snapshot) {
|
SequenceNumber* latest_snapshot) {
|
||||||
IterState* cleanup = new IterState;
|
IterState* cleanup = new IterState;
|
||||||
mutex_.Lock();
|
MemTable* mutable_mem;
|
||||||
*latest_snapshot = versions_->LastSequence();
|
std::vector<MemTable*> immutables;
|
||||||
|
Version* version;
|
||||||
|
|
||||||
// Collect together all needed child iterators for mem
|
// Collect together all needed child iterators for mem
|
||||||
std::vector<Iterator*> list;
|
mutex_.Lock();
|
||||||
|
*latest_snapshot = versions_->LastSequence();
|
||||||
mem_->Ref();
|
mem_->Ref();
|
||||||
list.push_back(mem_->NewIterator(options));
|
mutable_mem = mem_;
|
||||||
|
|
||||||
cleanup->mem.push_back(mem_);
|
|
||||||
|
|
||||||
// Collect together all needed child iterators for imm_
|
// Collect together all needed child iterators for imm_
|
||||||
std::vector<MemTable*> immutables;
|
|
||||||
imm_.GetMemTables(&immutables);
|
imm_.GetMemTables(&immutables);
|
||||||
for (unsigned int i = 0; i < immutables.size(); i++) {
|
for (unsigned int i = 0; i < immutables.size(); i++) {
|
||||||
MemTable* m = immutables[i];
|
immutables[i]->Ref();
|
||||||
m->Ref();
|
}
|
||||||
|
// Collect iterators for files in L0 - Ln
|
||||||
|
versions_->current()->Ref();
|
||||||
|
version = versions_->current();
|
||||||
|
mutex_.Unlock();
|
||||||
|
|
||||||
|
std::vector<Iterator*> list;
|
||||||
|
list.push_back(mutable_mem->NewIterator(options));
|
||||||
|
cleanup->mem.push_back(mutable_mem);
|
||||||
|
for (MemTable* m : immutables) {
|
||||||
list.push_back(m->NewIterator(options));
|
list.push_back(m->NewIterator(options));
|
||||||
cleanup->mem.push_back(m);
|
cleanup->mem.push_back(m);
|
||||||
}
|
}
|
||||||
|
version->AddIterators(options, storage_options_, &list);
|
||||||
// Collect iterators for files in L0 - Ln
|
|
||||||
versions_->current()->AddIterators(options, storage_options_, &list);
|
|
||||||
Iterator* internal_iter =
|
Iterator* internal_iter =
|
||||||
NewMergingIterator(&internal_comparator_, &list[0], list.size());
|
NewMergingIterator(&internal_comparator_, &list[0], list.size());
|
||||||
versions_->current()->Ref();
|
cleanup->version = version;
|
||||||
|
|
||||||
cleanup->mu = &mutex_;
|
cleanup->mu = &mutex_;
|
||||||
cleanup->db = this;
|
cleanup->db = this;
|
||||||
cleanup->version = versions_->current();
|
|
||||||
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
|
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
|
||||||
|
|
||||||
mutex_.Unlock();
|
|
||||||
return internal_iter;
|
return internal_iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2691,7 +2719,7 @@ Iterator* DBImpl::TEST_NewInternalIterator() {
|
|||||||
|
|
||||||
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
return versions_->MaxNextLevelOverlappingBytes();
|
return versions_->current()->MaxNextLevelOverlappingBytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Get(const ReadOptions& options,
|
Status DBImpl::Get(const ReadOptions& options,
|
||||||
@ -2898,7 +2926,7 @@ std::vector<Status> DBImpl::MultiGet(
|
|||||||
Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
|
Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
|
||||||
const std::string& column_family_name,
|
const std::string& column_family_name,
|
||||||
ColumnFamilyHandle* handle) {
|
ColumnFamilyHandle* handle) {
|
||||||
VersionEdit edit(0);
|
VersionEdit edit;
|
||||||
edit.AddColumnFamily(column_family_name);
|
edit.AddColumnFamily(column_family_name);
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
++versions_->max_column_family_;
|
++versions_->max_column_family_;
|
||||||
@ -2920,7 +2948,7 @@ Status DBImpl::DropColumnFamily(const ColumnFamilyHandle& column_family) {
|
|||||||
if (column_family.id == 0) {
|
if (column_family.id == 0) {
|
||||||
return Status::InvalidArgument("Can't drop default column family");
|
return Status::InvalidArgument("Can't drop default column family");
|
||||||
}
|
}
|
||||||
VersionEdit edit(0);
|
VersionEdit edit;
|
||||||
edit.DropColumnFamily();
|
edit.DropColumnFamily();
|
||||||
edit.SetColumnFamily(column_family.id);
|
edit.SetColumnFamily(column_family.id);
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
@ -3045,12 +3073,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
uint64_t last_sequence = versions_->LastSequence();
|
uint64_t last_sequence = versions_->LastSequence();
|
||||||
Writer* last_writer = &w;
|
Writer* last_writer = &w;
|
||||||
if (status.ok() && my_batch != nullptr) { // nullptr batch is for compactions
|
if (status.ok() && my_batch != nullptr) { // nullptr batch is for compactions
|
||||||
// TODO: BuildBatchGroup physically concatenate/copy all write batches into
|
autovector<WriteBatch*> write_batch_group;
|
||||||
// a new one. Mem copy is done with the lock held. Ideally, we only need
|
BuildBatchGroup(&last_writer, &write_batch_group);
|
||||||
// the lock to obtain the last_writer and the references to all batches.
|
|
||||||
// Creation (copy) of the merged batch could have been done outside of the
|
|
||||||
// lock protected region.
|
|
||||||
WriteBatch* updates = BuildBatchGroup(&last_writer);
|
|
||||||
|
|
||||||
// Add to log and apply to memtable. We can release the lock
|
// Add to log and apply to memtable. We can release the lock
|
||||||
// during this phase since &w is currently responsible for logging
|
// during this phase since &w is currently responsible for logging
|
||||||
@ -3058,6 +3082,16 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
// into mem_.
|
// into mem_.
|
||||||
{
|
{
|
||||||
mutex_.Unlock();
|
mutex_.Unlock();
|
||||||
|
WriteBatch* updates = nullptr;
|
||||||
|
if (write_batch_group.size() == 1) {
|
||||||
|
updates = write_batch_group[0];
|
||||||
|
} else {
|
||||||
|
updates = &tmp_batch_;
|
||||||
|
for (size_t i = 0; i < write_batch_group.size(); ++i) {
|
||||||
|
WriteBatchInternal::Append(updates, write_batch_group[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const SequenceNumber current_sequence = last_sequence + 1;
|
const SequenceNumber current_sequence = last_sequence + 1;
|
||||||
WriteBatchInternal::SetSequence(updates, current_sequence);
|
WriteBatchInternal::SetSequence(updates, current_sequence);
|
||||||
int my_batch_count = WriteBatchInternal::Count(updates);
|
int my_batch_count = WriteBatchInternal::Count(updates);
|
||||||
@ -3100,15 +3134,15 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
// have succeeded in memtable but Status reports error for all writes.
|
// have succeeded in memtable but Status reports error for all writes.
|
||||||
throw std::runtime_error("In memory WriteBatch corruption!");
|
throw std::runtime_error("In memory WriteBatch corruption!");
|
||||||
}
|
}
|
||||||
SetTickerCount(options_.statistics.get(),
|
SetTickerCount(options_.statistics.get(), SEQUENCE_NUMBER,
|
||||||
SEQUENCE_NUMBER, last_sequence);
|
last_sequence);
|
||||||
}
|
}
|
||||||
|
if (updates == &tmp_batch_) tmp_batch_.Clear();
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
versions_->SetLastSequence(last_sequence);
|
versions_->SetLastSequence(last_sequence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (updates == &tmp_batch_) tmp_batch_.Clear();
|
|
||||||
}
|
}
|
||||||
if (options_.paranoid_checks && !status.ok() && bg_error_.ok()) {
|
if (options_.paranoid_checks && !status.ok() && bg_error_.ok()) {
|
||||||
bg_error_ = status; // stop compaction & fail any further writes
|
bg_error_ = status; // stop compaction & fail any further writes
|
||||||
@ -3136,13 +3170,14 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
|
|
||||||
// REQUIRES: Writer list must be non-empty
|
// REQUIRES: Writer list must be non-empty
|
||||||
// REQUIRES: First writer must have a non-nullptr batch
|
// REQUIRES: First writer must have a non-nullptr batch
|
||||||
WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
|
void DBImpl::BuildBatchGroup(Writer** last_writer,
|
||||||
|
autovector<WriteBatch*>* write_batch_group) {
|
||||||
assert(!writers_.empty());
|
assert(!writers_.empty());
|
||||||
Writer* first = writers_.front();
|
Writer* first = writers_.front();
|
||||||
WriteBatch* result = first->batch;
|
assert(first->batch != nullptr);
|
||||||
assert(result != nullptr);
|
|
||||||
|
|
||||||
size_t size = WriteBatchInternal::ByteSize(first->batch);
|
size_t size = WriteBatchInternal::ByteSize(first->batch);
|
||||||
|
write_batch_group->push_back(first->batch);
|
||||||
|
|
||||||
// Allow the group to grow up to a maximum size, but if the
|
// Allow the group to grow up to a maximum size, but if the
|
||||||
// original write is small, limit the growth so we do not slow
|
// original write is small, limit the growth so we do not slow
|
||||||
@ -3175,18 +3210,10 @@ WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append to *reuslt
|
write_batch_group->push_back(w->batch);
|
||||||
if (result == first->batch) {
|
|
||||||
// Switch to temporary batch instead of disturbing caller's batch
|
|
||||||
result = &tmp_batch_;
|
|
||||||
assert(WriteBatchInternal::Count(result) == 0);
|
|
||||||
WriteBatchInternal::Append(result, first->batch);
|
|
||||||
}
|
|
||||||
WriteBatchInternal::Append(result, w->batch);
|
|
||||||
}
|
}
|
||||||
*last_writer = w;
|
*last_writer = w;
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function computes the amount of time in microseconds by which a write
|
// This function computes the amount of time in microseconds by which a write
|
||||||
@ -3200,7 +3227,7 @@ WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) {
|
|||||||
// The goal of this formula is to gradually increase the rate at which writes
|
// The goal of this formula is to gradually increase the rate at which writes
|
||||||
// are slowed. We also tried linear delay (r * 1000), but it seemed to do
|
// are slowed. We also tried linear delay (r * 1000), but it seemed to do
|
||||||
// slightly worse. There is no other particular reason for choosing quadratic.
|
// slightly worse. There is no other particular reason for choosing quadratic.
|
||||||
uint64_t DBImpl::SlowdownAmount(int n, int top, int bottom) {
|
uint64_t DBImpl::SlowdownAmount(int n, double bottom, double top) {
|
||||||
uint64_t delay;
|
uint64_t delay;
|
||||||
if (n >= top) {
|
if (n >= top) {
|
||||||
delay = 1000;
|
delay = 1000;
|
||||||
@ -3212,10 +3239,10 @@ uint64_t DBImpl::SlowdownAmount(int n, int top, int bottom) {
|
|||||||
// If we are here, we know that:
|
// If we are here, we know that:
|
||||||
// level0_start_slowdown <= n < level0_slowdown
|
// level0_start_slowdown <= n < level0_slowdown
|
||||||
// since the previous two conditions are false.
|
// since the previous two conditions are false.
|
||||||
float how_much =
|
double how_much =
|
||||||
(float) (n - bottom) /
|
(double) (n - bottom) /
|
||||||
(top - bottom);
|
(top - bottom);
|
||||||
delay = how_much * how_much * 1000;
|
delay = std::max(how_much * how_much * 1000, 100.0);
|
||||||
}
|
}
|
||||||
assert(delay <= 1000);
|
assert(delay <= 1000);
|
||||||
return delay;
|
return delay;
|
||||||
@ -3240,25 +3267,22 @@ Status DBImpl::MakeRoomForWrite(bool force,
|
|||||||
// Yield previous error
|
// Yield previous error
|
||||||
s = bg_error_;
|
s = bg_error_;
|
||||||
break;
|
break;
|
||||||
} else if (
|
} else if (allow_delay && versions_->NeedSlowdownForNumLevel0Files()) {
|
||||||
allow_delay &&
|
|
||||||
versions_->NumLevelFiles(0) >=
|
|
||||||
options_.level0_slowdown_writes_trigger) {
|
|
||||||
// We are getting close to hitting a hard limit on the number of
|
// We are getting close to hitting a hard limit on the number of
|
||||||
// L0 files. Rather than delaying a single write by several
|
// L0 files. Rather than delaying a single write by several
|
||||||
// seconds when we hit the hard limit, start delaying each
|
// seconds when we hit the hard limit, start delaying each
|
||||||
// individual write by 0-1ms to reduce latency variance. Also,
|
// individual write by 0-1ms to reduce latency variance. Also,
|
||||||
// this delay hands over some CPU to the compaction thread in
|
// this delay hands over some CPU to the compaction thread in
|
||||||
// case it is sharing the same core as the writer.
|
// case it is sharing the same core as the writer.
|
||||||
|
uint64_t slowdown =
|
||||||
|
SlowdownAmount(versions_->current()->NumLevelFiles(0),
|
||||||
|
options_.level0_slowdown_writes_trigger,
|
||||||
|
options_.level0_stop_writes_trigger);
|
||||||
mutex_.Unlock();
|
mutex_.Unlock();
|
||||||
uint64_t delayed;
|
uint64_t delayed;
|
||||||
{
|
{
|
||||||
StopWatch sw(env_, options_.statistics.get(), STALL_L0_SLOWDOWN_COUNT);
|
StopWatch sw(env_, options_.statistics.get(), STALL_L0_SLOWDOWN_COUNT);
|
||||||
env_->SleepForMicroseconds(
|
env_->SleepForMicroseconds(slowdown);
|
||||||
SlowdownAmount(versions_->NumLevelFiles(0),
|
|
||||||
options_.level0_slowdown_writes_trigger,
|
|
||||||
options_.level0_stop_writes_trigger)
|
|
||||||
);
|
|
||||||
delayed = sw.ElapsedMicros();
|
delayed = sw.ElapsedMicros();
|
||||||
}
|
}
|
||||||
RecordTick(options_.statistics.get(), STALL_L0_SLOWDOWN_MICROS, delayed);
|
RecordTick(options_.statistics.get(), STALL_L0_SLOWDOWN_MICROS, delayed);
|
||||||
@ -3290,7 +3314,7 @@ Status DBImpl::MakeRoomForWrite(bool force,
|
|||||||
STALL_MEMTABLE_COMPACTION_MICROS, stall);
|
STALL_MEMTABLE_COMPACTION_MICROS, stall);
|
||||||
stall_memtable_compaction_ += stall;
|
stall_memtable_compaction_ += stall;
|
||||||
stall_memtable_compaction_count_++;
|
stall_memtable_compaction_count_++;
|
||||||
} else if (versions_->NumLevelFiles(0) >=
|
} else if (versions_->current()->NumLevelFiles(0) >=
|
||||||
options_.level0_stop_writes_trigger) {
|
options_.level0_stop_writes_trigger) {
|
||||||
// There are too many level-0 files.
|
// There are too many level-0 files.
|
||||||
DelayLoggingAndReset();
|
DelayLoggingAndReset();
|
||||||
@ -3366,17 +3390,13 @@ Status DBImpl::MakeRoomForWrite(bool force,
|
|||||||
EnvOptions soptions(storage_options_);
|
EnvOptions soptions(storage_options_);
|
||||||
soptions.use_mmap_writes = false;
|
soptions.use_mmap_writes = false;
|
||||||
DelayLoggingAndReset();
|
DelayLoggingAndReset();
|
||||||
s = env_->NewWritableFile(
|
s = env_->NewWritableFile(LogFileName(options_.wal_dir, new_log_number),
|
||||||
LogFileName(options_.wal_dir, new_log_number),
|
&lfile, soptions);
|
||||||
&lfile,
|
|
||||||
soptions
|
|
||||||
);
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
// Our final size should be less than write_buffer_size
|
// Our final size should be less than write_buffer_size
|
||||||
// (compression, etc) but err on the side of caution.
|
// (compression, etc) but err on the side of caution.
|
||||||
lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size);
|
lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size);
|
||||||
memtmp = new MemTable(
|
memtmp = new MemTable(internal_comparator_, options_);
|
||||||
internal_comparator_, mem_rep_factory_, NumberLevels(), options_);
|
|
||||||
new_superversion = new SuperVersion(options_.max_write_buffer_number);
|
new_superversion = new SuperVersion(options_.max_write_buffer_number);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3426,6 +3446,7 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
|
|||||||
value->clear();
|
value->clear();
|
||||||
|
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
|
Version* current = versions_->current();
|
||||||
Slice in = property;
|
Slice in = property;
|
||||||
Slice prefix("rocksdb.");
|
Slice prefix("rocksdb.");
|
||||||
if (!in.starts_with(prefix)) return false;
|
if (!in.starts_with(prefix)) return false;
|
||||||
@ -3440,7 +3461,7 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
|
|||||||
} else {
|
} else {
|
||||||
char buf[100];
|
char buf[100];
|
||||||
snprintf(buf, sizeof(buf), "%d",
|
snprintf(buf, sizeof(buf), "%d",
|
||||||
versions_->NumLevelFiles(static_cast<int>(level)));
|
current->NumLevelFiles(static_cast<int>(level)));
|
||||||
*value = buf;
|
*value = buf;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -3455,8 +3476,8 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
|
|||||||
snprintf(buf, sizeof(buf),
|
snprintf(buf, sizeof(buf),
|
||||||
"%3d %8d %8.0f\n",
|
"%3d %8d %8.0f\n",
|
||||||
level,
|
level,
|
||||||
versions_->NumLevelFiles(level),
|
current->NumLevelFiles(level),
|
||||||
versions_->NumLevelBytes(level) / 1048576.0);
|
current->NumLevelBytes(level) / 1048576.0);
|
||||||
value->append(buf);
|
value->append(buf);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -3499,8 +3520,8 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
|
|||||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"
|
||||||
);
|
);
|
||||||
value->append(buf);
|
value->append(buf);
|
||||||
for (int level = 0; level < NumberLevels(); level++) {
|
for (int level = 0; level < current->NumberLevels(); level++) {
|
||||||
int files = versions_->NumLevelFiles(level);
|
int files = current->NumLevelFiles(level);
|
||||||
if (stats_[level].micros > 0 || files > 0) {
|
if (stats_[level].micros > 0 || files > 0) {
|
||||||
int64_t bytes_read = stats_[level].bytes_readn +
|
int64_t bytes_read = stats_[level].bytes_readn +
|
||||||
stats_[level].bytes_readnp1;
|
stats_[level].bytes_readnp1;
|
||||||
@ -3521,8 +3542,8 @@ bool DBImpl::GetProperty(const ColumnFamilyHandle& column_family,
|
|||||||
"%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %10.1f %9.1f %11.1f %8d %8d %8d %8d %8d %9.1f %9lu\n",
|
"%3d %8d %8.0f %5.1f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %10.1f %9.1f %11.1f %8d %8d %8d %8d %8d %9.1f %9lu\n",
|
||||||
level,
|
level,
|
||||||
files,
|
files,
|
||||||
versions_->NumLevelBytes(level) / 1048576.0,
|
current->NumLevelBytes(level) / 1048576.0,
|
||||||
versions_->NumLevelBytes(level) /
|
current->NumLevelBytes(level) /
|
||||||
versions_->MaxBytesForLevel(level),
|
versions_->MaxBytesForLevel(level),
|
||||||
stats_[level].micros / 1e6,
|
stats_[level].micros / 1e6,
|
||||||
bytes_read / 1048576.0,
|
bytes_read / 1048576.0,
|
||||||
@ -3758,7 +3779,7 @@ Status DBImpl::DeleteFile(std::string name) {
|
|||||||
int level;
|
int level;
|
||||||
FileMetaData metadata;
|
FileMetaData metadata;
|
||||||
int maxlevel = NumberLevels();
|
int maxlevel = NumberLevels();
|
||||||
VersionEdit edit(maxlevel);
|
VersionEdit edit;
|
||||||
DeletionState deletion_state(0, true);
|
DeletionState deletion_state(0, true);
|
||||||
{
|
{
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
@ -3781,7 +3802,7 @@ Status DBImpl::DeleteFile(std::string name) {
|
|||||||
// This is to make sure that any deletion tombstones are not
|
// This is to make sure that any deletion tombstones are not
|
||||||
// lost. Check that the level passed is the last level.
|
// lost. Check that the level passed is the last level.
|
||||||
for (int i = level + 1; i < maxlevel; i++) {
|
for (int i = level + 1; i < maxlevel; i++) {
|
||||||
if (versions_->NumLevelFiles(i) != 0) {
|
if (versions_->current()->NumLevelFiles(i) != 0) {
|
||||||
Log(options_.info_log,
|
Log(options_.info_log,
|
||||||
"DeleteFile %s FAILED. File not in last level\n", name.c_str());
|
"DeleteFile %s FAILED. File not in last level\n", name.c_str());
|
||||||
return Status::InvalidArgument("File not in last level");
|
return Status::InvalidArgument("File not in last level");
|
||||||
@ -3836,7 +3857,10 @@ Status DBImpl::GetDbIdentity(std::string& identity) {
|
|||||||
// can call if they wish
|
// can call if they wish
|
||||||
Status DB::Put(const WriteOptions& opt, const ColumnFamilyHandle& column_family,
|
Status DB::Put(const WriteOptions& opt, const ColumnFamilyHandle& column_family,
|
||||||
const Slice& key, const Slice& value) {
|
const Slice& key, const Slice& value) {
|
||||||
WriteBatch batch;
|
// Pre-allocate size of write batch conservatively.
|
||||||
|
// 8 bytes are taken by header, 4 bytes for count, 1 byte for type,
|
||||||
|
// and we allocate 11 extra bytes for key length, as well as value length.
|
||||||
|
WriteBatch batch(key.size() + value.size() + 24);
|
||||||
batch.Put(column_family.id, key, value);
|
batch.Put(column_family.id, key, value);
|
||||||
return Write(opt, &batch);
|
return Write(opt, &batch);
|
||||||
}
|
}
|
||||||
@ -3915,20 +3939,20 @@ Status DB::OpenWithColumnFamilies(
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
impl->mutex_.Lock();
|
impl->mutex_.Lock();
|
||||||
VersionEdit edit(impl->NumberLevels());
|
VersionEdit edit;
|
||||||
// Handles create_if_missing, error_if_exists
|
// Handles create_if_missing, error_if_exists
|
||||||
s = impl->Recover(&edit, column_families);
|
s = impl->Recover(&edit, column_families);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
uint64_t new_log_number = impl->versions_->NewFileNumber();
|
uint64_t new_log_number = impl->versions_->NewFileNumber();
|
||||||
unique_ptr<WritableFile> lfile;
|
unique_ptr<WritableFile> lfile;
|
||||||
soptions.use_mmap_writes = false;
|
soptions.use_mmap_writes = false;
|
||||||
s = options.env->NewWritableFile(
|
s = impl->options_.env->NewWritableFile(
|
||||||
LogFileName(impl->options_.wal_dir, new_log_number),
|
LogFileName(impl->options_.wal_dir, new_log_number),
|
||||||
&lfile,
|
&lfile,
|
||||||
soptions
|
soptions
|
||||||
);
|
);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
lfile->SetPreallocationBlockSize(1.1 * options.write_buffer_size);
|
lfile->SetPreallocationBlockSize(1.1 * impl->options_.write_buffer_size);
|
||||||
edit.SetLogNumber(new_log_number);
|
edit.SetLogNumber(new_log_number);
|
||||||
impl->logfile_number_ = new_log_number;
|
impl->logfile_number_ = new_log_number;
|
||||||
impl->log_.reset(new log::Writer(std::move(lfile)));
|
impl->log_.reset(new log::Writer(std::move(lfile)));
|
||||||
@ -3949,12 +3973,11 @@ Status DB::OpenWithColumnFamilies(
|
|||||||
impl->MaybeScheduleLogDBDeployStats();
|
impl->MaybeScheduleLogDBDeployStats();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl->mutex_.Unlock();
|
|
||||||
|
|
||||||
if (s.ok() && options.compaction_style == kCompactionStyleUniversal) {
|
if (s.ok() && impl->options_.compaction_style == kCompactionStyleUniversal) {
|
||||||
int num_files;
|
Version* current = impl->versions_->current();
|
||||||
for (int i = 1; i < impl->NumberLevels(); i++) {
|
for (int i = 1; i < impl->NumberLevels(); i++) {
|
||||||
num_files = impl->versions_->NumLevelFiles(i);
|
int num_files = current->NumLevelFiles(i);
|
||||||
if (num_files > 0) {
|
if (num_files > 0) {
|
||||||
s = Status::InvalidArgument("Not all files are at level 0. Cannot "
|
s = Status::InvalidArgument("Not all files are at level 0. Cannot "
|
||||||
"open with universal compaction style.");
|
"open with universal compaction style.");
|
||||||
@ -3963,6 +3986,8 @@ Status DB::OpenWithColumnFamilies(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl->mutex_.Unlock();
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
*dbptr = impl;
|
*dbptr = impl;
|
||||||
} else {
|
} else {
|
||||||
|
20
db/db_impl.h
20
db/db_impl.h
@ -22,6 +22,7 @@
|
|||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "util/stats_logger.h"
|
#include "util/stats_logger.h"
|
||||||
#include "memtablelist.h"
|
#include "memtablelist.h"
|
||||||
|
#include "util/autovector.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
@ -125,10 +126,17 @@ class DBImpl : public DB {
|
|||||||
|
|
||||||
virtual Status GetDbIdentity(std::string& identity);
|
virtual Status GetDbIdentity(std::string& identity);
|
||||||
|
|
||||||
|
void RunManualCompaction(int input_level,
|
||||||
|
int output_level,
|
||||||
|
const Slice* begin,
|
||||||
|
const Slice* end);
|
||||||
|
|
||||||
// Extra methods (for testing) that are not in the public DB interface
|
// Extra methods (for testing) that are not in the public DB interface
|
||||||
|
|
||||||
// Compact any files in the named level that overlap [*begin, *end]
|
// Compact any files in the named level that overlap [*begin, *end]
|
||||||
void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
|
void TEST_CompactRange(int level,
|
||||||
|
const Slice* begin,
|
||||||
|
const Slice* end);
|
||||||
|
|
||||||
// Force current memtable contents to be flushed.
|
// Force current memtable contents to be flushed.
|
||||||
Status TEST_FlushMemTable();
|
Status TEST_FlushMemTable();
|
||||||
@ -158,7 +166,7 @@ class DBImpl : public DB {
|
|||||||
void TEST_PurgeObsoleteteWAL();
|
void TEST_PurgeObsoleteteWAL();
|
||||||
|
|
||||||
// get total level0 file size. Only for testing.
|
// get total level0 file size. Only for testing.
|
||||||
uint64_t TEST_GetLevel0TotalSize() { return versions_->NumLevelBytes(0);}
|
uint64_t TEST_GetLevel0TotalSize();
|
||||||
|
|
||||||
void TEST_SetDefaultTimeToCheck(uint64_t default_interval_to_delete_obsolete_WAL)
|
void TEST_SetDefaultTimeToCheck(uint64_t default_interval_to_delete_obsolete_WAL)
|
||||||
{
|
{
|
||||||
@ -324,13 +332,14 @@ class DBImpl : public DB {
|
|||||||
Status WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
Status WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||||
uint64_t* filenumber);
|
uint64_t* filenumber);
|
||||||
|
|
||||||
uint64_t SlowdownAmount(int n, int top, int bottom);
|
uint64_t SlowdownAmount(int n, double bottom, double top);
|
||||||
// MakeRoomForWrite will return superversion_to_free through an arugment,
|
// MakeRoomForWrite will return superversion_to_free through an arugment,
|
||||||
// which the caller needs to delete. We do it because caller can delete
|
// which the caller needs to delete. We do it because caller can delete
|
||||||
// the superversion outside of mutex
|
// the superversion outside of mutex
|
||||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */,
|
Status MakeRoomForWrite(bool force /* compact even if there is room? */,
|
||||||
SuperVersion** superversion_to_free);
|
SuperVersion** superversion_to_free);
|
||||||
WriteBatch* BuildBatchGroup(Writer** last_writer);
|
void BuildBatchGroup(Writer** last_writer,
|
||||||
|
autovector<WriteBatch*>* write_batch_group);
|
||||||
|
|
||||||
// Force current memtable contents to be flushed.
|
// Force current memtable contents to be flushed.
|
||||||
Status FlushMemTable(const FlushOptions& options);
|
Status FlushMemTable(const FlushOptions& options);
|
||||||
@ -443,7 +452,8 @@ class DBImpl : public DB {
|
|||||||
|
|
||||||
// Information for a manual compaction
|
// Information for a manual compaction
|
||||||
struct ManualCompaction {
|
struct ManualCompaction {
|
||||||
int level;
|
int input_level;
|
||||||
|
int output_level;
|
||||||
bool done;
|
bool done;
|
||||||
bool in_progress; // compaction request being processed?
|
bool in_progress; // compaction request being processed?
|
||||||
const InternalKey* begin; // nullptr means beginning of key range
|
const InternalKey* begin; // nullptr means beginning of key range
|
||||||
|
@ -85,7 +85,7 @@ Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
|
|||||||
|
|
||||||
DBImplReadOnly* impl = new DBImplReadOnly(options, dbname);
|
DBImplReadOnly* impl = new DBImplReadOnly(options, dbname);
|
||||||
impl->mutex_.Lock();
|
impl->mutex_.Lock();
|
||||||
VersionEdit edit(impl->NumberLevels());
|
VersionEdit edit;
|
||||||
DBOptions db_options(options);
|
DBOptions db_options(options);
|
||||||
ColumnFamilyOptions cf_options(options);
|
ColumnFamilyOptions cf_options(options);
|
||||||
std::vector<ColumnFamilyDescriptor> column_families;
|
std::vector<ColumnFamilyDescriptor> column_families;
|
||||||
|
@ -1,14 +0,0 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
|
|
||||||
#include "db/db_statistics.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
std::shared_ptr<Statistics> CreateDBStatistics() {
|
|
||||||
return std::make_shared<DBStatistics>();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -1,63 +0,0 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
//
|
|
||||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style license that can be
|
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
#include <cassert>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <vector>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "rocksdb/statistics.h"
|
|
||||||
#include "util/histogram.h"
|
|
||||||
#include "port/port.h"
|
|
||||||
#include "util/mutexlock.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
class DBStatistics: public Statistics {
|
|
||||||
public:
|
|
||||||
DBStatistics() : allTickers_(TICKER_ENUM_MAX),
|
|
||||||
allHistograms_(HISTOGRAM_ENUM_MAX) { }
|
|
||||||
|
|
||||||
virtual ~DBStatistics() {}
|
|
||||||
|
|
||||||
virtual long getTickerCount(Tickers tickerType) {
|
|
||||||
assert(tickerType < TICKER_ENUM_MAX);
|
|
||||||
return allTickers_[tickerType].getCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void setTickerCount(Tickers tickerType, uint64_t count) {
|
|
||||||
assert(tickerType < TICKER_ENUM_MAX);
|
|
||||||
allTickers_[tickerType].setTickerCount(count);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void recordTick(Tickers tickerType, uint64_t count) {
|
|
||||||
assert(tickerType < TICKER_ENUM_MAX);
|
|
||||||
allTickers_[tickerType].recordTick(count);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void measureTime(Histograms histogramType, uint64_t value) {
|
|
||||||
assert(histogramType < HISTOGRAM_ENUM_MAX);
|
|
||||||
allHistograms_[histogramType].Add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void histogramData(Histograms histogramType,
|
|
||||||
HistogramData * const data) {
|
|
||||||
assert(histogramType < HISTOGRAM_ENUM_MAX);
|
|
||||||
allHistograms_[histogramType].Data(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<Ticker> allTickers_;
|
|
||||||
std::vector<HistogramImpl> allHistograms_;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::shared_ptr<Statistics> CreateDBStatistics();
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -65,13 +65,14 @@ void DBImpl::LogDBDeployStats() {
|
|||||||
|
|
||||||
uint64_t file_total_size = 0;
|
uint64_t file_total_size = 0;
|
||||||
uint32_t file_total_num = 0;
|
uint32_t file_total_num = 0;
|
||||||
for (int i = 0; i < versions_->NumberLevels(); i++) {
|
Version* current = versions_->current();
|
||||||
file_total_num += versions_->NumLevelFiles(i);
|
for (int i = 0; i < current->NumberLevels(); i++) {
|
||||||
file_total_size += versions_->NumLevelBytes(i);
|
file_total_num += current->NumLevelFiles(i);
|
||||||
|
file_total_size += current->NumLevelBytes(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
VersionSet::LevelSummaryStorage scratch;
|
Version::LevelSummaryStorage scratch;
|
||||||
const char* file_num_summary = versions_->LevelSummary(&scratch);
|
const char* file_num_summary = current->LevelSummary(&scratch);
|
||||||
std::string file_num_per_level(file_num_summary);
|
std::string file_num_per_level(file_num_summary);
|
||||||
std::string data_size_per_level(file_num_summary);
|
std::string data_size_per_level(file_num_summary);
|
||||||
|
|
||||||
|
286
db/db_test.cc
286
db/db_test.cc
@ -17,7 +17,6 @@
|
|||||||
#include "db/filename.h"
|
#include "db/filename.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "db/db_statistics.h"
|
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
@ -27,6 +26,7 @@
|
|||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
#include "util/testutil.h"
|
#include "util/testutil.h"
|
||||||
|
#include "util/statistics.h"
|
||||||
#include "utilities/merge_operators.h"
|
#include "utilities/merge_operators.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
@ -680,6 +680,10 @@ static std::string Key(int i) {
|
|||||||
return std::string(buf);
|
return std::string(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static long TestGetTickerCount(const Options& options, Tickers ticker_type) {
|
||||||
|
return options.statistics->getTickerCount(ticker_type);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(DBTest, Empty) {
|
TEST(DBTest, Empty) {
|
||||||
do {
|
do {
|
||||||
ASSERT_TRUE(db_ != nullptr);
|
ASSERT_TRUE(db_ != nullptr);
|
||||||
@ -713,14 +717,11 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
|||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
|
|
||||||
// index/filter blocks added to block cache right after table creation.
|
// index/filter blocks added to block cache right after table creation.
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_INDEX_MISS));
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
||||||
ASSERT_EQ(1,
|
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(2, /* only index/filter were added */
|
ASSERT_EQ(2, /* only index/filter were added */
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
ASSERT_EQ(0,
|
ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_DATA_MISS));
|
|
||||||
|
|
||||||
// Make sure filter block is in cache.
|
// Make sure filter block is in cache.
|
||||||
std::string value;
|
std::string value;
|
||||||
@ -728,31 +729,24 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
|||||||
db_->KeyMayExist(ReadOptions(), "key", &value);
|
db_->KeyMayExist(ReadOptions(), "key", &value);
|
||||||
|
|
||||||
// Miss count should remain the same.
|
// Miss count should remain the same.
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
||||||
ASSERT_EQ(1,
|
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
|
||||||
|
|
||||||
db_->KeyMayExist(ReadOptions(), "key", &value);
|
db_->KeyMayExist(ReadOptions(), "key", &value);
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
||||||
ASSERT_EQ(2,
|
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
|
||||||
|
|
||||||
// Make sure index block is in cache.
|
// Make sure index block is in cache.
|
||||||
auto index_block_hit =
|
auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT);
|
|
||||||
value = Get("key");
|
value = Get("key");
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(index_block_hit + 1,
|
ASSERT_EQ(index_block_hit + 1,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
||||||
|
|
||||||
value = Get("key");
|
value = Get("key");
|
||||||
ASSERT_EQ(1,
|
ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_MISS));
|
|
||||||
ASSERT_EQ(index_block_hit + 2,
|
ASSERT_EQ(index_block_hit + 2,
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, LevelLimitReopen) {
|
TEST(DBTest, LevelLimitReopen) {
|
||||||
@ -768,10 +762,9 @@ TEST(DBTest, LevelLimitReopen) {
|
|||||||
options.num_levels = 1;
|
options.num_levels = 1;
|
||||||
options.max_bytes_for_level_multiplier_additional.resize(1, 1);
|
options.max_bytes_for_level_multiplier_additional.resize(1, 1);
|
||||||
Status s = TryReopen(&options);
|
Status s = TryReopen(&options);
|
||||||
ASSERT_EQ(s.IsCorruption(), true);
|
ASSERT_EQ(s.IsInvalidArgument(), true);
|
||||||
ASSERT_EQ(s.ToString(),
|
ASSERT_EQ(s.ToString(),
|
||||||
"Corruption: VersionEdit: column family already has "
|
"Invalid argument: db has more levels than options.num_levels");
|
||||||
"more levels than specified");
|
|
||||||
|
|
||||||
options.num_levels = 10;
|
options.num_levels = 10;
|
||||||
options.max_bytes_for_level_multiplier_additional.resize(10, 1);
|
options.max_bytes_for_level_multiplier_additional.resize(10, 1);
|
||||||
@ -968,47 +961,39 @@ TEST(DBTest, KeyMayExist) {
|
|||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
value.clear();
|
value.clear();
|
||||||
|
|
||||||
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
long numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
long cache_added =
|
long cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
|
ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found));
|
||||||
ASSERT_TRUE(!value_found);
|
ASSERT_TRUE(!value_found);
|
||||||
// assert that no new files were opened and no new blocks were
|
// assert that no new files were opened and no new blocks were
|
||||||
// read into block cache.
|
// read into block cache.
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
|
|
||||||
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
ASSERT_OK(db_->Delete(WriteOptions(), "a"));
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
cache_added =
|
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
|
|
||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
dbfull()->CompactRange(nullptr, nullptr);
|
dbfull()->CompactRange(nullptr, nullptr);
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
cache_added =
|
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
|
|
||||||
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
ASSERT_OK(db_->Delete(WriteOptions(), "c"));
|
||||||
|
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
cache_added =
|
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value));
|
ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value));
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
|
|
||||||
delete options.filter_policy;
|
delete options.filter_policy;
|
||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
@ -1041,9 +1026,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
|
|
||||||
// verify that a non-blocking iterator does not find any
|
// verify that a non-blocking iterator does not find any
|
||||||
// kvs. Neither does it do any IOs to storage.
|
// kvs. Neither does it do any IOs to storage.
|
||||||
long numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
long numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
long cache_added =
|
long cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
iter = db_->NewIterator(non_blocking_opts);
|
iter = db_->NewIterator(non_blocking_opts);
|
||||||
count = 0;
|
count = 0;
|
||||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||||
@ -1051,18 +1035,16 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
}
|
}
|
||||||
ASSERT_EQ(count, 0);
|
ASSERT_EQ(count, 0);
|
||||||
ASSERT_TRUE(iter->status().IsIncomplete());
|
ASSERT_TRUE(iter->status().IsIncomplete());
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
// read in the specified block via a regular get
|
// read in the specified block via a regular get
|
||||||
ASSERT_EQ(Get("a"), "b");
|
ASSERT_EQ(Get("a"), "b");
|
||||||
|
|
||||||
// verify that we can find it via a non-blocking scan
|
// verify that we can find it via a non-blocking scan
|
||||||
numopen = options.statistics.get()->getTickerCount(NO_FILE_OPENS);
|
numopen = TestGetTickerCount(options, NO_FILE_OPENS);
|
||||||
cache_added =
|
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD);
|
|
||||||
iter = db_->NewIterator(non_blocking_opts);
|
iter = db_->NewIterator(non_blocking_opts);
|
||||||
count = 0;
|
count = 0;
|
||||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||||
@ -1070,9 +1052,8 @@ TEST(DBTest, NonBlockingIteration) {
|
|||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
ASSERT_EQ(count, 1);
|
ASSERT_EQ(count, 1);
|
||||||
ASSERT_EQ(numopen, options.statistics.get()->getTickerCount(NO_FILE_OPENS));
|
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
|
||||||
ASSERT_EQ(cache_added,
|
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
|
||||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_ADD));
|
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
@ -1277,12 +1258,10 @@ TEST(DBTest, IterReseek) {
|
|||||||
ASSERT_OK(Put("b", "bone"));
|
ASSERT_OK(Put("b", "bone"));
|
||||||
Iterator* iter = db_->NewIterator(ReadOptions());
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
|
||||||
ASSERT_EQ(IterStatus(iter), "a->two");
|
ASSERT_EQ(IterStatus(iter), "a->two");
|
||||||
iter->Next();
|
iter->Next();
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
|
||||||
ASSERT_EQ(IterStatus(iter), "b->bone");
|
ASSERT_EQ(IterStatus(iter), "b->bone");
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
@ -1293,8 +1272,7 @@ TEST(DBTest, IterReseek) {
|
|||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
ASSERT_EQ(IterStatus(iter), "a->three");
|
ASSERT_EQ(IterStatus(iter), "a->three");
|
||||||
iter->Next();
|
iter->Next();
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
|
||||||
ASSERT_EQ(IterStatus(iter), "b->bone");
|
ASSERT_EQ(IterStatus(iter), "b->bone");
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
@ -1304,30 +1282,28 @@ TEST(DBTest, IterReseek) {
|
|||||||
iter = db_->NewIterator(ReadOptions());
|
iter = db_->NewIterator(ReadOptions());
|
||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
ASSERT_EQ(IterStatus(iter), "a->four");
|
ASSERT_EQ(IterStatus(iter), "a->four");
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), 0);
|
|
||||||
iter->Next();
|
iter->Next();
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), 1);
|
|
||||||
ASSERT_EQ(IterStatus(iter), "b->bone");
|
ASSERT_EQ(IterStatus(iter), "b->bone");
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
// Testing reverse iterator
|
// Testing reverse iterator
|
||||||
// At this point, we have three versions of "a" and one version of "b".
|
// At this point, we have three versions of "a" and one version of "b".
|
||||||
// The reseek statistics is already at 1.
|
// The reseek statistics is already at 1.
|
||||||
int num_reseeks = (int)options.statistics.get()->getTickerCount(
|
int num_reseeks =
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION);
|
(int)TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION);
|
||||||
|
|
||||||
// Insert another version of b and assert that reseek is not invoked
|
// Insert another version of b and assert that reseek is not invoked
|
||||||
ASSERT_OK(Put("b", "btwo"));
|
ASSERT_OK(Put("b", "btwo"));
|
||||||
iter = db_->NewIterator(ReadOptions());
|
iter = db_->NewIterator(ReadOptions());
|
||||||
iter->SeekToLast();
|
iter->SeekToLast();
|
||||||
ASSERT_EQ(IterStatus(iter), "b->btwo");
|
ASSERT_EQ(IterStatus(iter), "b->btwo");
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks);
|
num_reseeks);
|
||||||
iter->Prev();
|
iter->Prev();
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks+1);
|
num_reseeks + 1);
|
||||||
ASSERT_EQ(IterStatus(iter), "a->four");
|
ASSERT_EQ(IterStatus(iter), "a->four");
|
||||||
delete iter;
|
delete iter;
|
||||||
|
|
||||||
@ -1338,13 +1314,13 @@ TEST(DBTest, IterReseek) {
|
|||||||
iter = db_->NewIterator(ReadOptions());
|
iter = db_->NewIterator(ReadOptions());
|
||||||
iter->SeekToLast();
|
iter->SeekToLast();
|
||||||
ASSERT_EQ(IterStatus(iter), "b->bfour");
|
ASSERT_EQ(IterStatus(iter), "b->bfour");
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks + 2);
|
num_reseeks + 2);
|
||||||
iter->Prev();
|
iter->Prev();
|
||||||
|
|
||||||
// the previous Prev call should have invoked reseek
|
// the previous Prev call should have invoked reseek
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(
|
ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
|
||||||
NUMBER_OF_RESEEKS_IN_ITERATION), num_reseeks + 3);
|
num_reseeks + 3);
|
||||||
ASSERT_EQ(IterStatus(iter), "a->four");
|
ASSERT_EQ(IterStatus(iter), "a->four");
|
||||||
delete iter;
|
delete iter;
|
||||||
}
|
}
|
||||||
@ -2107,24 +2083,18 @@ TEST(DBTest, CompressedCache) {
|
|||||||
switch (iter) {
|
switch (iter) {
|
||||||
case 0:
|
case 0:
|
||||||
// only uncompressed block cache
|
// only uncompressed block cache
|
||||||
ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
|
ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
|
||||||
0);
|
ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount
|
|
||||||
(BLOCK_CACHE_COMPRESSED_MISS), 0);
|
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
// no block cache, only compressed cache
|
// no block cache, only compressed cache
|
||||||
ASSERT_EQ(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
|
ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
|
||||||
0);
|
ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
|
||||||
ASSERT_GT(options.statistics.get()->getTickerCount
|
|
||||||
(BLOCK_CACHE_COMPRESSED_MISS), 0);
|
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
// both compressed and uncompressed block cache
|
// both compressed and uncompressed block cache
|
||||||
ASSERT_GT(options.statistics.get()->getTickerCount(BLOCK_CACHE_MISS),
|
ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
|
||||||
0);
|
ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
|
||||||
ASSERT_GT(options.statistics.get()->getTickerCount
|
|
||||||
(BLOCK_CACHE_COMPRESSED_MISS), 0);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_TRUE(false);
|
ASSERT_TRUE(false);
|
||||||
@ -3313,34 +3283,46 @@ TEST(DBTest, ManualCompaction) {
|
|||||||
ASSERT_EQ(dbfull()->MaxMemCompactionLevel(), 2)
|
ASSERT_EQ(dbfull()->MaxMemCompactionLevel(), 2)
|
||||||
<< "Need to update this test to match kMaxMemCompactLevel";
|
<< "Need to update this test to match kMaxMemCompactLevel";
|
||||||
|
|
||||||
MakeTables(3, "p", "q");
|
// iter - 0 with 7 levels
|
||||||
ASSERT_EQ("1,1,1", FilesPerLevel());
|
// iter - 1 with 3 levels
|
||||||
|
for (int iter = 0; iter < 2; ++iter) {
|
||||||
|
MakeTables(3, "p", "q");
|
||||||
|
ASSERT_EQ("1,1,1", FilesPerLevel());
|
||||||
|
|
||||||
// Compaction range falls before files
|
// Compaction range falls before files
|
||||||
Compact("", "c");
|
Compact("", "c");
|
||||||
ASSERT_EQ("1,1,1", FilesPerLevel());
|
ASSERT_EQ("1,1,1", FilesPerLevel());
|
||||||
|
|
||||||
// Compaction range falls after files
|
// Compaction range falls after files
|
||||||
Compact("r", "z");
|
Compact("r", "z");
|
||||||
ASSERT_EQ("1,1,1", FilesPerLevel());
|
ASSERT_EQ("1,1,1", FilesPerLevel());
|
||||||
|
|
||||||
// Compaction range overlaps files
|
// Compaction range overlaps files
|
||||||
Compact("p1", "p9");
|
Compact("p1", "p9");
|
||||||
ASSERT_EQ("0,0,1", FilesPerLevel());
|
ASSERT_EQ("0,0,1", FilesPerLevel());
|
||||||
|
|
||||||
// Populate a different range
|
// Populate a different range
|
||||||
MakeTables(3, "c", "e");
|
MakeTables(3, "c", "e");
|
||||||
ASSERT_EQ("1,1,2", FilesPerLevel());
|
ASSERT_EQ("1,1,2", FilesPerLevel());
|
||||||
|
|
||||||
// Compact just the new range
|
// Compact just the new range
|
||||||
Compact("b", "f");
|
Compact("b", "f");
|
||||||
ASSERT_EQ("0,0,2", FilesPerLevel());
|
ASSERT_EQ("0,0,2", FilesPerLevel());
|
||||||
|
|
||||||
|
// Compact all
|
||||||
|
MakeTables(1, "a", "z");
|
||||||
|
ASSERT_EQ("0,1,2", FilesPerLevel());
|
||||||
|
db_->CompactRange(nullptr, nullptr);
|
||||||
|
ASSERT_EQ("0,0,1", FilesPerLevel());
|
||||||
|
|
||||||
|
if (iter == 0) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.num_levels = 3;
|
||||||
|
options.create_if_missing = true;
|
||||||
|
DestroyAndReopen(&options);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Compact all
|
|
||||||
MakeTables(1, "a", "z");
|
|
||||||
ASSERT_EQ("0,1,2", FilesPerLevel());
|
|
||||||
db_->CompactRange(nullptr, nullptr);
|
|
||||||
ASSERT_EQ("0,0,1", FilesPerLevel());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DBTest, DBOpen_Options) {
|
TEST(DBTest, DBOpen_Options) {
|
||||||
@ -3401,7 +3383,7 @@ TEST(DBTest, DBOpen_Change_NumLevels) {
|
|||||||
opts.create_if_missing = false;
|
opts.create_if_missing = false;
|
||||||
opts.num_levels = 2;
|
opts.num_levels = 2;
|
||||||
s = DB::Open(opts, dbname, &db);
|
s = DB::Open(opts, dbname, &db);
|
||||||
ASSERT_TRUE(strstr(s.ToString().c_str(), "Corruption") != nullptr);
|
ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
|
||||||
ASSERT_TRUE(db == nullptr);
|
ASSERT_TRUE(db == nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4336,6 +4318,70 @@ TEST(DBTest, MultiThreaded) {
|
|||||||
} while (ChangeOptions());
|
} while (ChangeOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Group commit test:
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static const int kGCNumThreads = 4;
|
||||||
|
static const int kGCNumKeys = 1000;
|
||||||
|
|
||||||
|
struct GCThread {
|
||||||
|
DB* db;
|
||||||
|
int id;
|
||||||
|
std::atomic<bool> done;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void GCThreadBody(void* arg) {
|
||||||
|
GCThread* t = reinterpret_cast<GCThread*>(arg);
|
||||||
|
int id = t->id;
|
||||||
|
DB* db = t->db;
|
||||||
|
WriteOptions wo;
|
||||||
|
|
||||||
|
for (int i = 0; i < kGCNumKeys; ++i) {
|
||||||
|
std::string kv(std::to_string(i + id * kGCNumKeys));
|
||||||
|
ASSERT_OK(db->Put(wo, kv, kv));
|
||||||
|
}
|
||||||
|
t->done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST(DBTest, GroupCommitTest) {
|
||||||
|
do {
|
||||||
|
// Start threads
|
||||||
|
GCThread thread[kGCNumThreads];
|
||||||
|
for (int id = 0; id < kGCNumThreads; id++) {
|
||||||
|
thread[id].id = id;
|
||||||
|
thread[id].db = db_;
|
||||||
|
thread[id].done = false;
|
||||||
|
env_->StartThread(GCThreadBody, &thread[id]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int id = 0; id < kGCNumThreads; id++) {
|
||||||
|
while (thread[id].done == false) {
|
||||||
|
env_->SleepForMicroseconds(100000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> expected_db;
|
||||||
|
for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
|
||||||
|
expected_db.push_back(std::to_string(i));
|
||||||
|
}
|
||||||
|
sort(expected_db.begin(), expected_db.end());
|
||||||
|
|
||||||
|
Iterator* itr = db_->NewIterator(ReadOptions());
|
||||||
|
itr->SeekToFirst();
|
||||||
|
for (auto x : expected_db) {
|
||||||
|
ASSERT_TRUE(itr->Valid());
|
||||||
|
ASSERT_EQ(itr->key().ToString(), x);
|
||||||
|
ASSERT_EQ(itr->value().ToString(), x);
|
||||||
|
itr->Next();
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(!itr->Valid());
|
||||||
|
delete itr;
|
||||||
|
|
||||||
|
} while (ChangeOptions());
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
typedef std::map<std::string, std::string> KVMap;
|
typedef std::map<std::string, std::string> KVMap;
|
||||||
}
|
}
|
||||||
@ -4903,7 +4949,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
|
|||||||
EnvOptions sopt;
|
EnvOptions sopt;
|
||||||
VersionSet vset(dbname, &options, sopt, nullptr, &cmp);
|
VersionSet vset(dbname, &options, sopt, nullptr, &cmp);
|
||||||
ASSERT_OK(vset.Recover());
|
ASSERT_OK(vset.Recover());
|
||||||
VersionEdit vbase(vset.NumberLevels());
|
VersionEdit vbase;
|
||||||
uint64_t fnum = 1;
|
uint64_t fnum = 1;
|
||||||
for (int i = 0; i < num_base_files; i++) {
|
for (int i = 0; i < num_base_files; i++) {
|
||||||
InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
|
InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
|
||||||
@ -4915,7 +4961,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
|
|||||||
uint64_t start_micros = env->NowMicros();
|
uint64_t start_micros = env->NowMicros();
|
||||||
|
|
||||||
for (int i = 0; i < iters; i++) {
|
for (int i = 0; i < iters; i++) {
|
||||||
VersionEdit vedit(vset.NumberLevels());
|
VersionEdit vedit;
|
||||||
vedit.DeleteFile(2, fnum);
|
vedit.DeleteFile(2, fnum);
|
||||||
InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
|
InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
|
||||||
InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
|
InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
#include "util/murmurhash.h"
|
#include "util/murmurhash.h"
|
||||||
#include "util/statistics_imp.h"
|
#include "util/statistics.h"
|
||||||
|
|
||||||
namespace std {
|
namespace std {
|
||||||
template <>
|
template <>
|
||||||
@ -33,24 +33,20 @@ struct hash<rocksdb::Slice> {
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
MemTable::MemTable(const InternalKeyComparator& cmp,
|
MemTable::MemTable(const InternalKeyComparator& cmp, const Options& options)
|
||||||
MemTableRepFactory* table_factory,
|
|
||||||
int numlevel,
|
|
||||||
const Options& options)
|
|
||||||
: comparator_(cmp),
|
: comparator_(cmp),
|
||||||
refs_(0),
|
refs_(0),
|
||||||
arena_impl_(options.arena_block_size),
|
arena_impl_(options.arena_block_size),
|
||||||
table_(table_factory->CreateMemTableRep(comparator_, &arena_impl_)),
|
table_(options.memtable_factory->CreateMemTableRep(comparator_,
|
||||||
|
&arena_impl_)),
|
||||||
flush_in_progress_(false),
|
flush_in_progress_(false),
|
||||||
flush_completed_(false),
|
flush_completed_(false),
|
||||||
file_number_(0),
|
file_number_(0),
|
||||||
edit_(numlevel),
|
|
||||||
first_seqno_(0),
|
first_seqno_(0),
|
||||||
mem_next_logfile_number_(0),
|
mem_next_logfile_number_(0),
|
||||||
mem_logfile_number_(0),
|
mem_logfile_number_(0),
|
||||||
locks_(options.inplace_update_support
|
locks_(options.inplace_update_support ? options.inplace_update_num_locks
|
||||||
? options.inplace_update_num_locks
|
: 0) {}
|
||||||
: 0) { }
|
|
||||||
|
|
||||||
MemTable::~MemTable() {
|
MemTable::~MemTable() {
|
||||||
assert(refs_ == 0);
|
assert(refs_ == 0);
|
||||||
@ -58,7 +54,7 @@ MemTable::~MemTable() {
|
|||||||
|
|
||||||
size_t MemTable::ApproximateMemoryUsage() {
|
size_t MemTable::ApproximateMemoryUsage() {
|
||||||
return arena_impl_.ApproximateMemoryUsage() +
|
return arena_impl_.ApproximateMemoryUsage() +
|
||||||
table_->ApproximateMemoryUsage();
|
table_->ApproximateMemoryUsage();
|
||||||
}
|
}
|
||||||
|
|
||||||
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
||||||
@ -89,11 +85,11 @@ class MemTableIterator: public Iterator {
|
|||||||
MemTableIterator(MemTableRep* table, const ReadOptions& options)
|
MemTableIterator(MemTableRep* table, const ReadOptions& options)
|
||||||
: iter_() {
|
: iter_() {
|
||||||
if (options.prefix) {
|
if (options.prefix) {
|
||||||
iter_ = table->GetPrefixIterator(*options.prefix);
|
iter_.reset(table->GetPrefixIterator(*options.prefix));
|
||||||
} else if (options.prefix_seek) {
|
} else if (options.prefix_seek) {
|
||||||
iter_ = table->GetDynamicPrefixIterator();
|
iter_.reset(table->GetDynamicPrefixIterator());
|
||||||
} else {
|
} else {
|
||||||
iter_ = table->GetIterator();
|
iter_.reset(table->GetIterator());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +110,7 @@ class MemTableIterator: public Iterator {
|
|||||||
virtual Status status() const { return Status::OK(); }
|
virtual Status status() const { return Status::OK(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<MemTableRep::Iterator> iter_;
|
std::unique_ptr<MemTableRep::Iterator> iter_;
|
||||||
std::string tmp_; // For passing to EncodeKey
|
std::string tmp_; // For passing to EncodeKey
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
@ -165,8 +161,8 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
|||||||
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
MergeContext& merge_context, const Options& options) {
|
MergeContext& merge_context, const Options& options) {
|
||||||
Slice memkey = key.memtable_key();
|
Slice memkey = key.memtable_key();
|
||||||
std::shared_ptr<MemTableRep::Iterator> iter(
|
std::unique_ptr<MemTableRep::Iterator> iter(
|
||||||
table_->GetIterator(key.user_key()));
|
table_->GetIterator(key.user_key()));
|
||||||
iter->Seek(memkey.data());
|
iter->Seek(memkey.data());
|
||||||
|
|
||||||
bool merge_in_progress = s->IsMergeInProgress();
|
bool merge_in_progress = s->IsMergeInProgress();
|
||||||
@ -274,8 +270,8 @@ bool MemTable::Update(SequenceNumber seq, ValueType type,
|
|||||||
LookupKey lkey(key, seq);
|
LookupKey lkey(key, seq);
|
||||||
Slice memkey = lkey.memtable_key();
|
Slice memkey = lkey.memtable_key();
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator> iter(
|
std::unique_ptr<MemTableRep::Iterator> iter(
|
||||||
table_->GetIterator(lkey.user_key()));
|
table_->GetIterator(lkey.user_key()));
|
||||||
iter->Seek(memkey.data());
|
iter->Seek(memkey.data());
|
||||||
|
|
||||||
if (iter->Valid()) {
|
if (iter->Valid()) {
|
||||||
@ -336,8 +332,8 @@ size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) {
|
|||||||
// A total ordered iterator is costly for some memtablerep (prefix aware
|
// A total ordered iterator is costly for some memtablerep (prefix aware
|
||||||
// reps). By passing in the user key, we allow efficient iterator creation.
|
// reps). By passing in the user key, we allow efficient iterator creation.
|
||||||
// The iterator only needs to be ordered within the same user key.
|
// The iterator only needs to be ordered within the same user key.
|
||||||
std::shared_ptr<MemTableRep::Iterator> iter(
|
std::unique_ptr<MemTableRep::Iterator> iter(
|
||||||
table_->GetIterator(key.user_key()));
|
table_->GetIterator(key.user_key()));
|
||||||
iter->Seek(memkey.data());
|
iter->Seek(memkey.data());
|
||||||
|
|
||||||
size_t num_successive_merges = 0;
|
size_t num_successive_merges = 0;
|
||||||
|
@ -34,11 +34,8 @@ class MemTable {
|
|||||||
|
|
||||||
// MemTables are reference counted. The initial reference count
|
// MemTables are reference counted. The initial reference count
|
||||||
// is zero and the caller must call Ref() at least once.
|
// is zero and the caller must call Ref() at least once.
|
||||||
explicit MemTable(
|
explicit MemTable(const InternalKeyComparator& comparator,
|
||||||
const InternalKeyComparator& comparator,
|
const Options& options = Options());
|
||||||
MemTableRepFactory* table_factory,
|
|
||||||
int numlevel = 7,
|
|
||||||
const Options& options = Options());
|
|
||||||
|
|
||||||
~MemTable();
|
~MemTable();
|
||||||
|
|
||||||
@ -146,7 +143,7 @@ class MemTable {
|
|||||||
KeyComparator comparator_;
|
KeyComparator comparator_;
|
||||||
int refs_;
|
int refs_;
|
||||||
ArenaImpl arena_impl_;
|
ArenaImpl arena_impl_;
|
||||||
shared_ptr<MemTableRep> table_;
|
unique_ptr<MemTableRep> table_;
|
||||||
|
|
||||||
// These are used to manage memtable flushes to storage
|
// These are used to manage memtable flushes to storage
|
||||||
bool flush_in_progress_; // started the flush
|
bool flush_in_progress_; // started the flush
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/merge_operator.h"
|
#include "rocksdb/merge_operator.h"
|
||||||
#include "util/statistics_imp.h"
|
#include "util/statistics.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ class Repairer {
|
|||||||
next_file_number_(1) {
|
next_file_number_(1) {
|
||||||
// TableCache can be small since we expect each table to be opened once.
|
// TableCache can be small since we expect each table to be opened once.
|
||||||
table_cache_ = new TableCache(dbname_, &options_, storage_options_, 10);
|
table_cache_ = new TableCache(dbname_, &options_, storage_options_, 10);
|
||||||
edit_ = new VersionEdit(options.num_levels);
|
edit_ = new VersionEdit();
|
||||||
}
|
}
|
||||||
|
|
||||||
~Repairer() {
|
~Repairer() {
|
||||||
@ -196,8 +196,7 @@ class Repairer {
|
|||||||
std::string scratch;
|
std::string scratch;
|
||||||
Slice record;
|
Slice record;
|
||||||
WriteBatch batch;
|
WriteBatch batch;
|
||||||
MemTable* mem = new MemTable(icmp_, options_.memtable_factory.get(),
|
MemTable* mem = new MemTable(icmp_, options_);
|
||||||
options_.num_levels);
|
|
||||||
mem->Ref();
|
mem->Ref();
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
while (reader.ReadRecord(&record, &scratch)) {
|
while (reader.ReadRecord(&record, &scratch)) {
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#include "db/filename.h"
|
#include "db/filename.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "db/db_statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
|
@ -38,6 +38,7 @@ enum Tag {
|
|||||||
|
|
||||||
void VersionEdit::Clear() {
|
void VersionEdit::Clear() {
|
||||||
comparator_.clear();
|
comparator_.clear();
|
||||||
|
max_level_ = 0;
|
||||||
log_number_ = 0;
|
log_number_ = 0;
|
||||||
prev_log_number_ = 0;
|
prev_log_number_ = 0;
|
||||||
last_sequence_ = 0;
|
last_sequence_ = 0;
|
||||||
@ -77,12 +78,6 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
|||||||
PutVarint64(dst, last_sequence_);
|
PutVarint64(dst, last_sequence_);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
|
||||||
PutVarint32(dst, kCompactPointer);
|
|
||||||
PutVarint32(dst, compact_pointers_[i].first); // level
|
|
||||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||||
iter != deleted_files_.end();
|
iter != deleted_files_.end();
|
||||||
++iter) {
|
++iter) {
|
||||||
@ -131,14 +126,13 @@ static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
|||||||
|
|
||||||
bool VersionEdit::GetLevel(Slice* input, int* level, const char** msg) {
|
bool VersionEdit::GetLevel(Slice* input, int* level, const char** msg) {
|
||||||
uint32_t v;
|
uint32_t v;
|
||||||
if (GetVarint32(input, &v) &&
|
if (GetVarint32(input, &v)) {
|
||||||
(int)v < number_levels_) {
|
|
||||||
*level = v;
|
*level = v;
|
||||||
|
if (max_level_ < *level) {
|
||||||
|
max_level_ = *level;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
if ((int)v >= number_levels_) {
|
|
||||||
*msg = "column family already has more levels than specified";
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -202,7 +196,9 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
|||||||
case kCompactPointer:
|
case kCompactPointer:
|
||||||
if (GetLevel(&input, &level, &msg) &&
|
if (GetLevel(&input, &level, &msg) &&
|
||||||
GetInternalKey(&input, &key)) {
|
GetInternalKey(&input, &key)) {
|
||||||
compact_pointers_.push_back(std::make_pair(level, key));
|
// we don't use compact pointers anymore,
|
||||||
|
// but we should not fail if they are still
|
||||||
|
// in manifest
|
||||||
} else {
|
} else {
|
||||||
if (!msg) {
|
if (!msg) {
|
||||||
msg = "compaction pointer";
|
msg = "compaction pointer";
|
||||||
@ -314,12 +310,6 @@ std::string VersionEdit::DebugString(bool hex_key) const {
|
|||||||
r.append("\n LastSeq: ");
|
r.append("\n LastSeq: ");
|
||||||
AppendNumberTo(&r, last_sequence_);
|
AppendNumberTo(&r, last_sequence_);
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
|
||||||
r.append("\n CompactPointer: ");
|
|
||||||
AppendNumberTo(&r, compact_pointers_[i].first);
|
|
||||||
r.append(" ");
|
|
||||||
r.append(compact_pointers_[i].second.DebugString(hex_key));
|
|
||||||
}
|
|
||||||
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
|
||||||
iter != deleted_files_.end();
|
iter != deleted_files_.end();
|
||||||
++iter) {
|
++iter) {
|
||||||
|
@ -34,9 +34,7 @@ struct FileMetaData {
|
|||||||
|
|
||||||
class VersionEdit {
|
class VersionEdit {
|
||||||
public:
|
public:
|
||||||
explicit VersionEdit(int number_levels) : number_levels_(number_levels) {
|
VersionEdit() { Clear(); }
|
||||||
Clear();
|
|
||||||
}
|
|
||||||
~VersionEdit() { }
|
~VersionEdit() { }
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
@ -61,9 +59,6 @@ class VersionEdit {
|
|||||||
has_last_sequence_ = true;
|
has_last_sequence_ = true;
|
||||||
last_sequence_ = seq;
|
last_sequence_ = seq;
|
||||||
}
|
}
|
||||||
void SetCompactPointer(int level, const InternalKey& key) {
|
|
||||||
compact_pointers_.push_back(std::make_pair(level, key));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the specified file at the specified number.
|
// Add the specified file at the specified number.
|
||||||
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
|
// REQUIRES: This version has not been saved (see VersionSet::SaveTo)
|
||||||
@ -128,7 +123,7 @@ class VersionEdit {
|
|||||||
|
|
||||||
bool GetLevel(Slice* input, int* level, const char** msg);
|
bool GetLevel(Slice* input, int* level, const char** msg);
|
||||||
|
|
||||||
int number_levels_;
|
int max_level_;
|
||||||
std::string comparator_;
|
std::string comparator_;
|
||||||
uint64_t log_number_;
|
uint64_t log_number_;
|
||||||
uint64_t prev_log_number_;
|
uint64_t prev_log_number_;
|
||||||
@ -140,7 +135,6 @@ class VersionEdit {
|
|||||||
bool has_next_file_number_;
|
bool has_next_file_number_;
|
||||||
bool has_last_sequence_;
|
bool has_last_sequence_;
|
||||||
|
|
||||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
|
||||||
DeletedFileSet deleted_files_;
|
DeletedFileSet deleted_files_;
|
||||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ namespace rocksdb {
|
|||||||
static void TestEncodeDecode(const VersionEdit& edit) {
|
static void TestEncodeDecode(const VersionEdit& edit) {
|
||||||
std::string encoded, encoded2;
|
std::string encoded, encoded2;
|
||||||
edit.EncodeTo(&encoded);
|
edit.EncodeTo(&encoded);
|
||||||
VersionEdit parsed(7);
|
VersionEdit parsed;
|
||||||
Status s = parsed.DecodeFrom(encoded);
|
Status s = parsed.DecodeFrom(encoded);
|
||||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||||
parsed.EncodeTo(&encoded2);
|
parsed.EncodeTo(&encoded2);
|
||||||
@ -27,7 +27,7 @@ class VersionEditTest { };
|
|||||||
TEST(VersionEditTest, EncodeDecode) {
|
TEST(VersionEditTest, EncodeDecode) {
|
||||||
static const uint64_t kBig = 1ull << 50;
|
static const uint64_t kBig = 1ull << 50;
|
||||||
|
|
||||||
VersionEdit edit(7);
|
VersionEdit edit;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
TestEncodeDecode(edit);
|
TestEncodeDecode(edit);
|
||||||
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
||||||
@ -36,7 +36,6 @@ TEST(VersionEditTest, EncodeDecode) {
|
|||||||
kBig + 500 + i,
|
kBig + 500 + i,
|
||||||
kBig + 600 + i);
|
kBig + 600 + i);
|
||||||
edit.DeleteFile(4, kBig + 700 + i);
|
edit.DeleteFile(4, kBig + 700 + i);
|
||||||
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
edit.SetComparatorName("foo");
|
edit.SetComparatorName("foo");
|
||||||
@ -47,7 +46,7 @@ TEST(VersionEditTest, EncodeDecode) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(VersionEditTest, ColumnFamilyTest) {
|
TEST(VersionEditTest, ColumnFamilyTest) {
|
||||||
VersionEdit edit(7);
|
VersionEdit edit;
|
||||||
edit.SetColumnFamily(2);
|
edit.SetColumnFamily(2);
|
||||||
edit.AddColumnFamily("column_family");
|
edit.AddColumnFamily("column_family");
|
||||||
TestEncodeDecode(edit);
|
TestEncodeDecode(edit);
|
||||||
|
1700
db/version_set.cc
1700
db/version_set.cc
File diff suppressed because it is too large
Load Diff
318
db/version_set.h
318
db/version_set.h
@ -27,12 +27,15 @@
|
|||||||
#include "db/version_edit.h"
|
#include "db/version_edit.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "db/table_cache.h"
|
#include "db/table_cache.h"
|
||||||
|
#include "db/compaction.h"
|
||||||
|
#include "db/compaction_picker.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
namespace log { class Writer; }
|
namespace log { class Writer; }
|
||||||
|
|
||||||
class Compaction;
|
class Compaction;
|
||||||
|
class CompactionPicker;
|
||||||
class Iterator;
|
class Iterator;
|
||||||
class MemTable;
|
class MemTable;
|
||||||
class TableCache;
|
class TableCache;
|
||||||
@ -86,6 +89,11 @@ class Version {
|
|||||||
// REQUIRES: lock is held
|
// REQUIRES: lock is held
|
||||||
bool UpdateStats(const GetStats& stats);
|
bool UpdateStats(const GetStats& stats);
|
||||||
|
|
||||||
|
// Updates internal structures that keep track of compaction scores
|
||||||
|
// We use compaction scores to figure out which compaction to do next
|
||||||
|
// Also pre-sorts level0 files for Get()
|
||||||
|
void Finalize(std::vector<uint64_t>& size_being_compacted);
|
||||||
|
|
||||||
// Reference count management (so Versions do not disappear out from
|
// Reference count management (so Versions do not disappear out from
|
||||||
// under live iterators)
|
// under live iterators)
|
||||||
void Ref();
|
void Ref();
|
||||||
@ -135,21 +143,54 @@ class Version {
|
|||||||
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
|
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
|
||||||
const Slice& largest_user_key);
|
const Slice& largest_user_key);
|
||||||
|
|
||||||
int NumFiles(int level) const { return files_[level].size(); }
|
int NumberLevels() const { return num_levels_; }
|
||||||
|
|
||||||
|
// REQUIRES: lock is held
|
||||||
|
int NumLevelFiles(int level) const { return files_[level].size(); }
|
||||||
|
|
||||||
|
// Return the combined file size of all files at the specified level.
|
||||||
|
int64_t NumLevelBytes(int level) const;
|
||||||
|
|
||||||
|
// Return a human-readable short (single-line) summary of the number
|
||||||
|
// of files per level. Uses *scratch as backing store.
|
||||||
|
struct LevelSummaryStorage {
|
||||||
|
char buffer[100];
|
||||||
|
};
|
||||||
|
struct FileSummaryStorage {
|
||||||
|
char buffer[1000];
|
||||||
|
};
|
||||||
|
const char* LevelSummary(LevelSummaryStorage* scratch) const;
|
||||||
|
// Return a human-readable short (single-line) summary of files
|
||||||
|
// in a specified level. Uses *scratch as backing store.
|
||||||
|
const char* LevelFileSummary(FileSummaryStorage* scratch, int level) const;
|
||||||
|
|
||||||
|
// Return the maximum overlapping data (in bytes) at next level for any
|
||||||
|
// file at a level >= 1.
|
||||||
|
int64_t MaxNextLevelOverlappingBytes();
|
||||||
|
|
||||||
|
// Add all files listed in the current version to *live.
|
||||||
|
void AddLiveFiles(std::set<uint64_t>* live);
|
||||||
|
|
||||||
// Return a human readable string that describes this version's contents.
|
// Return a human readable string that describes this version's contents.
|
||||||
std::string DebugString(bool hex = false) const;
|
std::string DebugString(bool hex = false) const;
|
||||||
|
|
||||||
// Returns the version nuber of this version
|
// Returns the version nuber of this version
|
||||||
uint64_t GetVersionNumber() {
|
uint64_t GetVersionNumber() const { return version_number_; }
|
||||||
return version_number_;
|
|
||||||
}
|
// used to sort files by size
|
||||||
|
struct Fsize {
|
||||||
|
int index;
|
||||||
|
FileMetaData* file;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class Compaction;
|
friend class Compaction;
|
||||||
friend class VersionSet;
|
friend class VersionSet;
|
||||||
friend class DBImpl;
|
friend class DBImpl;
|
||||||
friend struct ColumnFamilyData;
|
friend struct ColumnFamilyData;
|
||||||
|
friend class CompactionPicker;
|
||||||
|
friend class LevelCompactionPicker;
|
||||||
|
friend class UniversalCompactionPicker;
|
||||||
|
|
||||||
class LevelFileNumIterator;
|
class LevelFileNumIterator;
|
||||||
Iterator* NewConcatenatingIterator(const ReadOptions&,
|
Iterator* NewConcatenatingIterator(const ReadOptions&,
|
||||||
@ -158,10 +199,15 @@ class Version {
|
|||||||
bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions,
|
bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions,
|
||||||
const Slice& internal_prefix, Iterator* level_iter) const;
|
const Slice& internal_prefix, Iterator* level_iter) const;
|
||||||
|
|
||||||
|
// Sort all files for this version based on their file size and
|
||||||
|
// record results in files_by_size_. The largest files are listed first.
|
||||||
|
void UpdateFilesBySize();
|
||||||
|
|
||||||
VersionSet* vset_; // VersionSet to which this Version belongs
|
VersionSet* vset_; // VersionSet to which this Version belongs
|
||||||
Version* next_; // Next version in linked list
|
Version* next_; // Next version in linked list
|
||||||
Version* prev_; // Previous version in linked list
|
Version* prev_; // Previous version in linked list
|
||||||
int refs_; // Number of live refs to this version
|
int refs_; // Number of live refs to this version
|
||||||
|
int num_levels_; // Number of levels
|
||||||
|
|
||||||
// List of files per level, files in each level are arranged
|
// List of files per level, files in each level are arranged
|
||||||
// in increasing order of keys
|
// in increasing order of keys
|
||||||
@ -251,10 +297,8 @@ struct ColumnFamilyData {
|
|||||||
|
|
||||||
class VersionSet {
|
class VersionSet {
|
||||||
public:
|
public:
|
||||||
VersionSet(const std::string& dbname,
|
VersionSet(const std::string& dbname, const Options* options,
|
||||||
const Options* options,
|
const EnvOptions& storage_options, TableCache* table_cache,
|
||||||
const EnvOptions& storage_options,
|
|
||||||
TableCache* table_cache,
|
|
||||||
const InternalKeyComparator*);
|
const InternalKeyComparator*);
|
||||||
~VersionSet();
|
~VersionSet();
|
||||||
|
|
||||||
@ -292,6 +336,12 @@ class VersionSet {
|
|||||||
return column_family_data_.find(0)->second->current;
|
return column_family_data_.find(0)->second->current;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A Flag indicating whether write needs to slowdown because of there are
|
||||||
|
// too many number of level0 files.
|
||||||
|
bool NeedSlowdownForNumLevel0Files() const {
|
||||||
|
return need_slowdown_for_num_level0_files_;
|
||||||
|
}
|
||||||
|
|
||||||
// Return the current manifest file number
|
// Return the current manifest file number
|
||||||
uint64_t ManifestFileNumber() const { return manifest_file_number_; }
|
uint64_t ManifestFileNumber() const { return manifest_file_number_; }
|
||||||
|
|
||||||
@ -307,12 +357,6 @@ class VersionSet {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the number of Table files at the specified level.
|
|
||||||
int NumLevelFiles(int level) const;
|
|
||||||
|
|
||||||
// Return the combined file size of all files at the specified level.
|
|
||||||
int64_t NumLevelBytes(int level) const;
|
|
||||||
|
|
||||||
// Return the last sequence number.
|
// Return the last sequence number.
|
||||||
uint64_t LastSequence() const {
|
uint64_t LastSequence() const {
|
||||||
return last_sequence_.load(std::memory_order_acquire);
|
return last_sequence_.load(std::memory_order_acquire);
|
||||||
@ -346,14 +390,18 @@ class VersionSet {
|
|||||||
// the specified level. Returns nullptr if there is nothing in that
|
// the specified level. Returns nullptr if there is nothing in that
|
||||||
// level that overlaps the specified range. Caller should delete
|
// level that overlaps the specified range. Caller should delete
|
||||||
// the result.
|
// the result.
|
||||||
Compaction* CompactRange(
|
//
|
||||||
int level,
|
// The returned Compaction might not include the whole requested range.
|
||||||
const InternalKey* begin,
|
// In that case, compaction_end will be set to the next key that needs
|
||||||
const InternalKey* end);
|
// compacting. In case the compaction will compact the whole range,
|
||||||
|
// compaction_end will be set to nullptr.
|
||||||
// Return the maximum overlapping data (in bytes) at next level for any
|
// Client is responsible for compaction_end storage -- when called,
|
||||||
// file at a level >= 1.
|
// *compaction_end should point to valid InternalKey!
|
||||||
int64_t MaxNextLevelOverlappingBytes();
|
Compaction* CompactRange(int input_level,
|
||||||
|
int output_level,
|
||||||
|
const InternalKey* begin,
|
||||||
|
const InternalKey* end,
|
||||||
|
InternalKey** compaction_end);
|
||||||
|
|
||||||
// Create an iterator that reads over the compaction inputs for "*c".
|
// Create an iterator that reads over the compaction inputs for "*c".
|
||||||
// The caller should delete the iterator when no longer needed.
|
// The caller should delete the iterator when no longer needed.
|
||||||
@ -405,58 +453,16 @@ class VersionSet {
|
|||||||
// Add all files listed in any live version to *live.
|
// Add all files listed in any live version to *live.
|
||||||
void AddLiveFiles(std::vector<uint64_t>* live_list);
|
void AddLiveFiles(std::vector<uint64_t>* live_list);
|
||||||
|
|
||||||
// Add all files listed in the current version to *live.
|
|
||||||
void AddLiveFilesCurrentVersion(std::set<uint64_t>* live);
|
|
||||||
|
|
||||||
// Return the approximate offset in the database of the data for
|
// Return the approximate offset in the database of the data for
|
||||||
// "key" as of version "v".
|
// "key" as of version "v".
|
||||||
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
||||||
|
|
||||||
// Return a human-readable short (single-line) summary of the number
|
|
||||||
// of files per level. Uses *scratch as backing store.
|
|
||||||
struct LevelSummaryStorage {
|
|
||||||
char buffer[100];
|
|
||||||
};
|
|
||||||
struct FileSummaryStorage {
|
|
||||||
char buffer[1000];
|
|
||||||
};
|
|
||||||
const char* LevelSummary(LevelSummaryStorage* scratch) const;
|
|
||||||
|
|
||||||
// printf contents (for debugging)
|
// printf contents (for debugging)
|
||||||
Status DumpManifest(Options& options, std::string& manifestFileName,
|
Status DumpManifest(Options& options, std::string& manifestFileName,
|
||||||
bool verbose, bool hex = false);
|
bool verbose, bool hex = false);
|
||||||
|
|
||||||
// Return a human-readable short (single-line) summary of the data size
|
|
||||||
// of files per level. Uses *scratch as backing store.
|
|
||||||
const char* LevelDataSizeSummary(LevelSummaryStorage* scratch) const;
|
|
||||||
|
|
||||||
// Return a human-readable short (single-line) summary of files
|
|
||||||
// in a specified level. Uses *scratch as backing store.
|
|
||||||
const char* LevelFileSummary(Version* version,
|
|
||||||
FileSummaryStorage* scratch,
|
|
||||||
int level) const;
|
|
||||||
|
|
||||||
// Return the size of the current manifest file
|
// Return the size of the current manifest file
|
||||||
const uint64_t ManifestFileSize() { return manifest_file_size_; }
|
uint64_t ManifestFileSize() const { return manifest_file_size_; }
|
||||||
|
|
||||||
// For the specfied level, pick a compaction.
|
|
||||||
// Returns nullptr if there is no compaction to be done.
|
|
||||||
// If level is 0 and there is already a compaction on that level, this
|
|
||||||
// function will return nullptr.
|
|
||||||
Compaction* PickCompactionBySize(int level, double score);
|
|
||||||
|
|
||||||
// Pick files to compact in Universal mode
|
|
||||||
Compaction* PickCompactionUniversal(int level, double score);
|
|
||||||
|
|
||||||
// Pick Universal compaction to limit read amplification
|
|
||||||
Compaction* PickCompactionUniversalReadAmp(int level, double score,
|
|
||||||
unsigned int ratio, unsigned int num_files);
|
|
||||||
|
|
||||||
// Pick Universal compaction to limit space amplification.
|
|
||||||
Compaction* PickCompactionUniversalSizeAmp(int level, double score);
|
|
||||||
|
|
||||||
// Free up the files that were participated in a compaction
|
|
||||||
void ReleaseCompactionFiles(Compaction* c, Status status);
|
|
||||||
|
|
||||||
// verify that the files that we started with for a compaction
|
// verify that the files that we started with for a compaction
|
||||||
// still exist in the current version and in the same original level.
|
// still exist in the current version and in the same original level.
|
||||||
@ -464,20 +470,12 @@ class VersionSet {
|
|||||||
// pick the same files to compact.
|
// pick the same files to compact.
|
||||||
bool VerifyCompactionFileConsistency(Compaction* c);
|
bool VerifyCompactionFileConsistency(Compaction* c);
|
||||||
|
|
||||||
// used to sort files by size
|
double MaxBytesForLevel(int level);
|
||||||
typedef struct fsize {
|
|
||||||
int index;
|
|
||||||
FileMetaData* file;
|
|
||||||
} Fsize;
|
|
||||||
|
|
||||||
// Sort all files for this version based on their file size and
|
|
||||||
// record results in files_by_size_. The largest files are listed first.
|
|
||||||
void UpdateFilesBySize(Version *v);
|
|
||||||
|
|
||||||
// Get the max file size in a given level.
|
// Get the max file size in a given level.
|
||||||
uint64_t MaxFileSizeForLevel(int level);
|
uint64_t MaxFileSizeForLevel(int level);
|
||||||
|
|
||||||
double MaxBytesForLevel(int level);
|
void ReleaseCompactionFiles(Compaction* c, Status status);
|
||||||
|
|
||||||
Status GetMetadataForFile(
|
Status GetMetadataForFile(
|
||||||
uint64_t number, int *filelevel, FileMetaData *metadata);
|
uint64_t number, int *filelevel, FileMetaData *metadata);
|
||||||
@ -503,23 +501,6 @@ class VersionSet {
|
|||||||
friend class Compaction;
|
friend class Compaction;
|
||||||
friend class Version;
|
friend class Version;
|
||||||
|
|
||||||
void Init(int num_levels);
|
|
||||||
|
|
||||||
void Finalize(Version* v, std::vector<uint64_t>&);
|
|
||||||
|
|
||||||
void GetRange(const std::vector<FileMetaData*>& inputs,
|
|
||||||
InternalKey* smallest,
|
|
||||||
InternalKey* largest);
|
|
||||||
|
|
||||||
void GetRange2(const std::vector<FileMetaData*>& inputs1,
|
|
||||||
const std::vector<FileMetaData*>& inputs2,
|
|
||||||
InternalKey* smallest,
|
|
||||||
InternalKey* largest);
|
|
||||||
|
|
||||||
void ExpandWhileOverlapping(Compaction* c);
|
|
||||||
|
|
||||||
void SetupOtherInputs(Compaction* c);
|
|
||||||
|
|
||||||
// Save current contents to *log
|
// Save current contents to *log
|
||||||
Status WriteSnapshot(log::Writer* log);
|
Status WriteSnapshot(log::Writer* log);
|
||||||
|
|
||||||
@ -527,10 +508,6 @@ class VersionSet {
|
|||||||
|
|
||||||
bool ManifestContains(const std::string& record) const;
|
bool ManifestContains(const std::string& record) const;
|
||||||
|
|
||||||
uint64_t ExpandedCompactionByteSizeLimit(int level);
|
|
||||||
|
|
||||||
uint64_t MaxGrandParentOverlapBytes(int level);
|
|
||||||
|
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
const std::string dbname_;
|
const std::string dbname_;
|
||||||
const Options* const options_;
|
const Options* const options_;
|
||||||
@ -547,18 +524,13 @@ class VersionSet {
|
|||||||
// Opened lazily
|
// Opened lazily
|
||||||
unique_ptr<log::Writer> descriptor_log_;
|
unique_ptr<log::Writer> descriptor_log_;
|
||||||
|
|
||||||
// Per-level key at which the next compaction at that level should start.
|
// A flag indicating whether we should delay writes because
|
||||||
// Either an empty string, or a valid InternalKey.
|
// we have too many level 0 files
|
||||||
std::string* compact_pointer_;
|
bool need_slowdown_for_num_level0_files_;
|
||||||
|
|
||||||
// Per-level target file size.
|
// An object that keeps all the compaction stats
|
||||||
uint64_t* max_file_size_;
|
// and picks the next compaction
|
||||||
|
std::unique_ptr<CompactionPicker> compaction_picker_;
|
||||||
// Per-level max bytes
|
|
||||||
uint64_t* level_max_bytes_;
|
|
||||||
|
|
||||||
// record all the ongoing compactions for all levels
|
|
||||||
std::vector<std::set<Compaction*> > compactions_in_progress_;
|
|
||||||
|
|
||||||
// generates a increasing version number for every new version
|
// generates a increasing version number for every new version
|
||||||
uint64_t current_version_number_;
|
uint64_t current_version_number_;
|
||||||
@ -566,7 +538,7 @@ class VersionSet {
|
|||||||
// Queue of writers to the manifest file
|
// Queue of writers to the manifest file
|
||||||
std::deque<ManifestWriter*> manifest_writers_;
|
std::deque<ManifestWriter*> manifest_writers_;
|
||||||
|
|
||||||
// size of manifest file
|
// Current size of manifest file
|
||||||
uint64_t manifest_file_size_;
|
uint64_t manifest_file_size_;
|
||||||
|
|
||||||
std::vector<FileMetaData*> obsolete_files_;
|
std::vector<FileMetaData*> obsolete_files_;
|
||||||
@ -582,138 +554,8 @@ class VersionSet {
|
|||||||
VersionSet(const VersionSet&);
|
VersionSet(const VersionSet&);
|
||||||
void operator=(const VersionSet&);
|
void operator=(const VersionSet&);
|
||||||
|
|
||||||
// Return the total amount of data that is undergoing
|
|
||||||
// compactions per level
|
|
||||||
void SizeBeingCompacted(std::vector<uint64_t>&);
|
|
||||||
|
|
||||||
// Returns true if any one of the parent files are being compacted
|
|
||||||
bool ParentRangeInCompaction(const InternalKey* smallest,
|
|
||||||
const InternalKey* largest, int level, int* index);
|
|
||||||
|
|
||||||
// Returns true if any one of the specified files are being compacted
|
|
||||||
bool FilesInCompaction(std::vector<FileMetaData*>& files);
|
|
||||||
|
|
||||||
void LogAndApplyHelper(Builder*b, Version* v,
|
void LogAndApplyHelper(Builder*b, Version* v,
|
||||||
VersionEdit* edit, port::Mutex* mu);
|
VersionEdit* edit, port::Mutex* mu);
|
||||||
};
|
};
|
||||||
|
|
||||||
// A Compaction encapsulates information about a compaction.
|
|
||||||
class Compaction {
|
|
||||||
public:
|
|
||||||
~Compaction();
|
|
||||||
|
|
||||||
// Return the level that is being compacted. Inputs from "level"
|
|
||||||
// will be merged.
|
|
||||||
int level() const { return level_; }
|
|
||||||
|
|
||||||
// Outputs will go to this level
|
|
||||||
int output_level() const { return out_level_; }
|
|
||||||
|
|
||||||
// Return the object that holds the edits to the descriptor done
|
|
||||||
// by this compaction.
|
|
||||||
VersionEdit* edit() { return edit_; }
|
|
||||||
|
|
||||||
// "which" must be either 0 or 1
|
|
||||||
int num_input_files(int which) const { return inputs_[which].size(); }
|
|
||||||
|
|
||||||
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
|
|
||||||
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
|
|
||||||
|
|
||||||
// Maximum size of files to build during this compaction.
|
|
||||||
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
|
||||||
|
|
||||||
// Whether compression will be enabled for compaction outputs
|
|
||||||
bool enable_compression() const { return enable_compression_; }
|
|
||||||
|
|
||||||
// Is this a trivial compaction that can be implemented by just
|
|
||||||
// moving a single input file to the next level (no merging or splitting)
|
|
||||||
bool IsTrivialMove() const;
|
|
||||||
|
|
||||||
// Add all inputs to this compaction as delete operations to *edit.
|
|
||||||
void AddInputDeletions(VersionEdit* edit);
|
|
||||||
|
|
||||||
// Returns true if the information we have available guarantees that
|
|
||||||
// the compaction is producing data in "level+1" for which no data exists
|
|
||||||
// in levels greater than "level+1".
|
|
||||||
bool IsBaseLevelForKey(const Slice& user_key);
|
|
||||||
|
|
||||||
// Returns true iff we should stop building the current output
|
|
||||||
// before processing "internal_key".
|
|
||||||
bool ShouldStopBefore(const Slice& internal_key);
|
|
||||||
|
|
||||||
// Release the input version for the compaction, once the compaction
|
|
||||||
// is successful.
|
|
||||||
void ReleaseInputs();
|
|
||||||
|
|
||||||
void Summary(char* output, int len);
|
|
||||||
|
|
||||||
// Return the score that was used to pick this compaction run.
|
|
||||||
double score() const { return score_; }
|
|
||||||
|
|
||||||
// Is this compaction creating a file in the bottom most level?
|
|
||||||
bool BottomMostLevel() { return bottommost_level_; }
|
|
||||||
|
|
||||||
// Does this compaction include all sst files?
|
|
||||||
bool IsFullCompaction() { return is_full_compaction_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
friend class Version;
|
|
||||||
friend class VersionSet;
|
|
||||||
|
|
||||||
Compaction(int level,
|
|
||||||
int out_level,
|
|
||||||
uint64_t target_file_size,
|
|
||||||
uint64_t max_grandparent_overlap_bytes,
|
|
||||||
int number_levels,
|
|
||||||
Version* input_version,
|
|
||||||
bool seek_compaction = false,
|
|
||||||
bool enable_compression = true);
|
|
||||||
|
|
||||||
int level_;
|
|
||||||
int out_level_; // levels to which output files are stored
|
|
||||||
uint64_t max_output_file_size_;
|
|
||||||
uint64_t maxGrandParentOverlapBytes_;
|
|
||||||
Version* input_version_;
|
|
||||||
VersionEdit* edit_;
|
|
||||||
int number_levels_;
|
|
||||||
|
|
||||||
bool seek_compaction_;
|
|
||||||
bool enable_compression_;
|
|
||||||
|
|
||||||
// Each compaction reads inputs from "level_" and "level_+1"
|
|
||||||
std::vector<FileMetaData*> inputs_[2]; // The two sets of inputs
|
|
||||||
|
|
||||||
// State used to check for number of of overlapping grandparent files
|
|
||||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
|
||||||
std::vector<FileMetaData*> grandparents_;
|
|
||||||
size_t grandparent_index_; // Index in grandparent_starts_
|
|
||||||
bool seen_key_; // Some output key has been seen
|
|
||||||
uint64_t overlapped_bytes_; // Bytes of overlap between current output
|
|
||||||
// and grandparent files
|
|
||||||
int base_index_; // index of the file in files_[level_]
|
|
||||||
int parent_index_; // index of some file with same range in files_[level_+1]
|
|
||||||
double score_; // score that was used to pick this compaction.
|
|
||||||
|
|
||||||
// Is this compaction creating a file in the bottom most level?
|
|
||||||
bool bottommost_level_;
|
|
||||||
// Does this compaction include all sst files?
|
|
||||||
bool is_full_compaction_;
|
|
||||||
|
|
||||||
// level_ptrs_ holds indices into input_version_->levels_: our state
|
|
||||||
// is that we are positioned at one of the file ranges for each
|
|
||||||
// higher level than the ones involved in this compaction (i.e. for
|
|
||||||
// all L >= level_ + 2).
|
|
||||||
std::vector<size_t> level_ptrs_;
|
|
||||||
|
|
||||||
// mark (or clear) all files that are being compacted
|
|
||||||
void MarkFilesBeingCompacted(bool);
|
|
||||||
|
|
||||||
// Initialize whether compaction producing files at the bottommost level
|
|
||||||
void SetupBottomMostLevel(bool isManual);
|
|
||||||
|
|
||||||
// In case of compaction error, reset the nextIndex that is used
|
|
||||||
// to pick up the next file to be compacted from files_by_size_
|
|
||||||
void ResetNextCompactionIndex();
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -26,7 +26,7 @@ Status VersionSet::ReduceNumberOfLevels(int new_levels, port::Mutex* mu) {
|
|||||||
|
|
||||||
// TODO this only works for default column family now
|
// TODO this only works for default column family now
|
||||||
Version* current_version = column_family_data_.find(0)->second->current;
|
Version* current_version = column_family_data_.find(0)->second->current;
|
||||||
int current_levels = NumberLevels();
|
int current_levels = current_version->NumberLevels();
|
||||||
|
|
||||||
if (current_levels <= new_levels) {
|
if (current_levels <= new_levels) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
@ -37,7 +37,7 @@ Status VersionSet::ReduceNumberOfLevels(int new_levels, port::Mutex* mu) {
|
|||||||
int first_nonempty_level = -1;
|
int first_nonempty_level = -1;
|
||||||
int first_nonempty_level_filenum = 0;
|
int first_nonempty_level_filenum = 0;
|
||||||
for (int i = new_levels - 1; i < current_levels; i++) {
|
for (int i = new_levels - 1; i < current_levels; i++) {
|
||||||
int file_num = NumLevelFiles(i);
|
int file_num = current_version->NumLevelFiles(i);
|
||||||
if (file_num != 0) {
|
if (file_num != 0) {
|
||||||
if (first_nonempty_level < 0) {
|
if (first_nonempty_level < 0) {
|
||||||
first_nonempty_level = i;
|
first_nonempty_level = i;
|
||||||
@ -66,15 +66,12 @@ Status VersionSet::ReduceNumberOfLevels(int new_levels, port::Mutex* mu) {
|
|||||||
|
|
||||||
delete[] current_version->files_;
|
delete[] current_version->files_;
|
||||||
current_version->files_ = new_files_list;
|
current_version->files_ = new_files_list;
|
||||||
|
current_version->num_levels_ = new_levels;
|
||||||
|
|
||||||
delete[] compact_pointer_;
|
|
||||||
delete[] max_file_size_;
|
|
||||||
delete[] level_max_bytes_;
|
|
||||||
num_levels_ = new_levels;
|
num_levels_ = new_levels;
|
||||||
compact_pointer_ = new std::string[new_levels];
|
compaction_picker_->ReduceNumberOfLevels(new_levels);
|
||||||
Init(new_levels);
|
VersionEdit ve;
|
||||||
VersionEdit ve(new_levels);
|
st = LogAndApply(&ve, mu, true);
|
||||||
st = LogAndApply(&ve , mu, true);
|
|
||||||
return st;
|
return st;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
#include "db/snapshot.h"
|
#include "db/snapshot.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/statistics_imp.h"
|
#include "util/statistics.h"
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
@ -39,7 +39,8 @@ namespace rocksdb {
|
|||||||
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
|
// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
|
||||||
static const size_t kHeader = 12;
|
static const size_t kHeader = 12;
|
||||||
|
|
||||||
WriteBatch::WriteBatch() {
|
WriteBatch::WriteBatch(size_t reserved_bytes) {
|
||||||
|
rep_.reserve((reserved_bytes > kHeader) ? reserved_bytes : kHeader);
|
||||||
Clear();
|
Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,10 +22,11 @@ namespace rocksdb {
|
|||||||
static std::string PrintContents(WriteBatch* b) {
|
static std::string PrintContents(WriteBatch* b) {
|
||||||
InternalKeyComparator cmp(BytewiseComparator());
|
InternalKeyComparator cmp(BytewiseComparator());
|
||||||
auto factory = std::make_shared<SkipListFactory>();
|
auto factory = std::make_shared<SkipListFactory>();
|
||||||
MemTable* mem = new MemTable(cmp, factory.get());
|
Options options;
|
||||||
|
options.memtable_factory = factory;
|
||||||
|
MemTable* mem = new MemTable(cmp, options);
|
||||||
mem->Ref();
|
mem->Ref();
|
||||||
std::string state;
|
std::string state;
|
||||||
Options options;
|
|
||||||
Status s = WriteBatchInternal::InsertInto(b, mem, &options);
|
Status s = WriteBatchInternal::InsertInto(b, mem, &options);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
Iterator* iter = mem->NewIterator();
|
Iterator* iter = mem->NewIterator();
|
||||||
|
@ -291,6 +291,7 @@ class DB {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Compact the underlying storage for the key range [*begin,*end].
|
// Compact the underlying storage for the key range [*begin,*end].
|
||||||
|
// The actual compaction interval might be superset of [*begin, *end].
|
||||||
// In particular, deleted and overwritten versions are discarded,
|
// In particular, deleted and overwritten versions are discarded,
|
||||||
// and the data is rearranged to reduce the cost of operations
|
// and the data is rearranged to reduce the cost of operations
|
||||||
// needed to access the data. This operation should typically only
|
// needed to access the data. This operation should typically only
|
||||||
|
@ -111,27 +111,23 @@ class MemTableRep {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Return an iterator over the keys in this representation.
|
// Return an iterator over the keys in this representation.
|
||||||
virtual std::shared_ptr<Iterator> GetIterator() = 0;
|
virtual Iterator* GetIterator() = 0;
|
||||||
|
|
||||||
// Return an iterator over at least the keys with the specified user key. The
|
// Return an iterator over at least the keys with the specified user key. The
|
||||||
// iterator may also allow access to other keys, but doesn't have to. Default:
|
// iterator may also allow access to other keys, but doesn't have to. Default:
|
||||||
// GetIterator().
|
// GetIterator().
|
||||||
virtual std::shared_ptr<Iterator> GetIterator(const Slice& user_key) {
|
virtual Iterator* GetIterator(const Slice& user_key) { return GetIterator(); }
|
||||||
return GetIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an iterator over at least the keys with the specified prefix. The
|
// Return an iterator over at least the keys with the specified prefix. The
|
||||||
// iterator may also allow access to other keys, but doesn't have to. Default:
|
// iterator may also allow access to other keys, but doesn't have to. Default:
|
||||||
// GetIterator().
|
// GetIterator().
|
||||||
virtual std::shared_ptr<Iterator> GetPrefixIterator(const Slice& prefix) {
|
virtual Iterator* GetPrefixIterator(const Slice& prefix) {
|
||||||
return GetIterator();
|
return GetIterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return an iterator that has a special Seek semantics. The result of
|
// Return an iterator that has a special Seek semantics. The result of
|
||||||
// a Seek might only include keys with the same prefix as the target key.
|
// a Seek might only include keys with the same prefix as the target key.
|
||||||
virtual std::shared_ptr<Iterator> GetDynamicPrefixIterator() {
|
virtual Iterator* GetDynamicPrefixIterator() { return GetIterator(); }
|
||||||
return GetIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// When *key is an internal key concatenated with the value, returns the
|
// When *key is an internal key concatenated with the value, returns the
|
||||||
@ -144,8 +140,8 @@ class MemTableRep {
|
|||||||
class MemTableRepFactory {
|
class MemTableRepFactory {
|
||||||
public:
|
public:
|
||||||
virtual ~MemTableRepFactory() { };
|
virtual ~MemTableRepFactory() { };
|
||||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
|
||||||
MemTableRep::KeyComparator&, Arena*) = 0;
|
Arena*) = 0;
|
||||||
virtual const char* Name() const = 0;
|
virtual const char* Name() const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -161,8 +157,8 @@ class VectorRepFactory : public MemTableRepFactory {
|
|||||||
const size_t count_;
|
const size_t count_;
|
||||||
public:
|
public:
|
||||||
explicit VectorRepFactory(size_t count = 0) : count_(count) { }
|
explicit VectorRepFactory(size_t count = 0) : count_(count) { }
|
||||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
|
||||||
MemTableRep::KeyComparator&, Arena*) override;
|
Arena*) override;
|
||||||
virtual const char* Name() const override {
|
virtual const char* Name() const override {
|
||||||
return "VectorRepFactory";
|
return "VectorRepFactory";
|
||||||
}
|
}
|
||||||
@ -171,8 +167,8 @@ public:
|
|||||||
// This uses a skip list to store keys. It is the default.
|
// This uses a skip list to store keys. It is the default.
|
||||||
class SkipListFactory : public MemTableRepFactory {
|
class SkipListFactory : public MemTableRepFactory {
|
||||||
public:
|
public:
|
||||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator&,
|
||||||
MemTableRep::KeyComparator&, Arena*) override;
|
Arena*) override;
|
||||||
virtual const char* Name() const override {
|
virtual const char* Name() const override {
|
||||||
return "SkipListFactory";
|
return "SkipListFactory";
|
||||||
}
|
}
|
||||||
|
@ -242,53 +242,10 @@ struct HistogramData {
|
|||||||
double standard_deviation;
|
double standard_deviation;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class Histogram {
|
|
||||||
public:
|
|
||||||
// clear's the histogram
|
|
||||||
virtual void Clear() = 0;
|
|
||||||
virtual ~Histogram();
|
|
||||||
// Add a value to be recorded in the histogram.
|
|
||||||
virtual void Add(uint64_t value) = 0;
|
|
||||||
|
|
||||||
virtual std::string ToString() const = 0;
|
|
||||||
|
|
||||||
// Get statistics
|
|
||||||
virtual double Median() const = 0;
|
|
||||||
virtual double Percentile(double p) const = 0;
|
|
||||||
virtual double Average() const = 0;
|
|
||||||
virtual double StandardDeviation() const = 0;
|
|
||||||
virtual void Data(HistogramData * const data) const = 0;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A dumb ticker which keeps incrementing through its life time.
|
|
||||||
* Thread safe. Locking managed by implementation of this interface.
|
|
||||||
*/
|
|
||||||
class Ticker {
|
|
||||||
public:
|
|
||||||
Ticker() : count_(0) { }
|
|
||||||
|
|
||||||
inline void setTickerCount(uint64_t count) {
|
|
||||||
count_ = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void recordTick(int count = 1) {
|
|
||||||
count_ += count;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uint64_t getCount() {
|
|
||||||
return count_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::atomic_uint_fast64_t count_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Analyze the performance of a db
|
// Analyze the performance of a db
|
||||||
class Statistics {
|
class Statistics {
|
||||||
public:
|
public:
|
||||||
|
virtual ~Statistics() {}
|
||||||
|
|
||||||
virtual long getTickerCount(Tickers tickerType) = 0;
|
virtual long getTickerCount(Tickers tickerType) = 0;
|
||||||
virtual void recordTick(Tickers tickerType, uint64_t count = 0) = 0;
|
virtual void recordTick(Tickers tickerType, uint64_t count = 0) = 0;
|
||||||
|
@ -36,7 +36,7 @@ struct SliceParts;
|
|||||||
|
|
||||||
class WriteBatch {
|
class WriteBatch {
|
||||||
public:
|
public:
|
||||||
WriteBatch();
|
explicit WriteBatch(size_t reserved_bytes = 0);
|
||||||
~WriteBatch();
|
~WriteBatch();
|
||||||
|
|
||||||
// Store the mapping "key->value" in the database.
|
// Store the mapping "key->value" in the database.
|
||||||
@ -122,7 +122,10 @@ class WriteBatch {
|
|||||||
Status Iterate(Handler* handler) const;
|
Status Iterate(Handler* handler) const;
|
||||||
|
|
||||||
// Retrieve the serialized version of this batch.
|
// Retrieve the serialized version of this batch.
|
||||||
std::string Data() { return rep_; }
|
const std::string& Data() const { return rep_; }
|
||||||
|
|
||||||
|
// Retrieve data size of the batch.
|
||||||
|
size_t GetDataSize() const { return rep_.size(); }
|
||||||
|
|
||||||
// Returns the number of updates in the batch
|
// Returns the number of updates in the batch
|
||||||
int Count() const;
|
int Count() const;
|
||||||
|
@ -12,7 +12,8 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/db_statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
|
#include "util/statistics.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
@ -370,7 +371,9 @@ class MemTableConstructor: public Constructor {
|
|||||||
: Constructor(cmp),
|
: Constructor(cmp),
|
||||||
internal_comparator_(cmp),
|
internal_comparator_(cmp),
|
||||||
table_factory_(new SkipListFactory) {
|
table_factory_(new SkipListFactory) {
|
||||||
memtable_ = new MemTable(internal_comparator_, table_factory_.get());
|
Options options;
|
||||||
|
options.memtable_factory = table_factory_;
|
||||||
|
memtable_ = new MemTable(internal_comparator_, options);
|
||||||
memtable_->Ref();
|
memtable_->Ref();
|
||||||
}
|
}
|
||||||
~MemTableConstructor() {
|
~MemTableConstructor() {
|
||||||
@ -378,7 +381,9 @@ class MemTableConstructor: public Constructor {
|
|||||||
}
|
}
|
||||||
virtual Status FinishImpl(const Options& options, const KVMap& data) {
|
virtual Status FinishImpl(const Options& options, const KVMap& data) {
|
||||||
delete memtable_->Unref();
|
delete memtable_->Unref();
|
||||||
memtable_ = new MemTable(internal_comparator_, table_factory_.get());
|
Options memtable_options;
|
||||||
|
memtable_options.memtable_factory = table_factory_;
|
||||||
|
memtable_ = new MemTable(internal_comparator_, memtable_options);
|
||||||
memtable_->Ref();
|
memtable_->Ref();
|
||||||
int seq = 1;
|
int seq = 1;
|
||||||
for (KVMap::const_iterator it = data.begin();
|
for (KVMap::const_iterator it = data.begin();
|
||||||
@ -931,18 +936,12 @@ TEST(TableTest, NumBlockStat) {
|
|||||||
class BlockCacheProperties {
|
class BlockCacheProperties {
|
||||||
public:
|
public:
|
||||||
explicit BlockCacheProperties(Statistics* statistics) {
|
explicit BlockCacheProperties(Statistics* statistics) {
|
||||||
block_cache_miss =
|
block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
|
||||||
statistics->getTickerCount(BLOCK_CACHE_MISS);
|
block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
|
||||||
block_cache_hit =
|
index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
|
||||||
statistics->getTickerCount(BLOCK_CACHE_HIT);
|
index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
|
||||||
index_block_cache_miss =
|
data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
|
||||||
statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
|
data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
|
||||||
index_block_cache_hit =
|
|
||||||
statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
|
|
||||||
data_block_cache_miss =
|
|
||||||
statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
|
|
||||||
data_block_cache_hit =
|
|
||||||
statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the fetched props matches the expected ones.
|
// Check if the fetched props matches the expected ones.
|
||||||
@ -1268,10 +1267,11 @@ class MemTableTest { };
|
|||||||
TEST(MemTableTest, Simple) {
|
TEST(MemTableTest, Simple) {
|
||||||
InternalKeyComparator cmp(BytewiseComparator());
|
InternalKeyComparator cmp(BytewiseComparator());
|
||||||
auto table_factory = std::make_shared<SkipListFactory>();
|
auto table_factory = std::make_shared<SkipListFactory>();
|
||||||
MemTable* memtable = new MemTable(cmp, table_factory.get());
|
Options options;
|
||||||
|
options.memtable_factory = table_factory;
|
||||||
|
MemTable* memtable = new MemTable(cmp, options);
|
||||||
memtable->Ref();
|
memtable->Ref();
|
||||||
WriteBatch batch;
|
WriteBatch batch;
|
||||||
Options options;
|
|
||||||
WriteBatchInternal::SetSequence(&batch, 100);
|
WriteBatchInternal::SetSequence(&batch, 100);
|
||||||
batch.Put(std::string("k1"), std::string("v1"));
|
batch.Put(std::string("k1"), std::string("v1"));
|
||||||
batch.Put(std::string("k2"), std::string("v2"));
|
batch.Put(std::string("k2"), std::string("v2"));
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/db_statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
#include "utilities/utility_db.h"
|
#include "utilities/utility_db.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
|
@ -31,17 +31,15 @@ class HashSkipListRep : public MemTableRep {
|
|||||||
|
|
||||||
virtual ~HashSkipListRep();
|
virtual ~HashSkipListRep();
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override;
|
virtual MemTableRep::Iterator* GetIterator() override;
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator(
|
virtual MemTableRep::Iterator* GetIterator(const Slice& slice) override;
|
||||||
const Slice& slice) override;
|
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetPrefixIterator(
|
virtual MemTableRep::Iterator* GetPrefixIterator(const Slice& prefix)
|
||||||
const Slice& prefix) override;
|
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetDynamicPrefixIterator()
|
|
||||||
override;
|
override;
|
||||||
|
|
||||||
|
virtual MemTableRep::Iterator* GetDynamicPrefixIterator() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class DynamicIterator;
|
friend class DynamicIterator;
|
||||||
typedef SkipList<const char*, MemTableRep::KeyComparator&> Bucket;
|
typedef SkipList<const char*, MemTableRep::KeyComparator&> Bucket;
|
||||||
@ -208,18 +206,15 @@ class HashSkipListRep : public MemTableRep {
|
|||||||
virtual void SeekToLast() { }
|
virtual void SeekToLast() { }
|
||||||
private:
|
private:
|
||||||
};
|
};
|
||||||
|
|
||||||
std::shared_ptr<EmptyIterator> empty_iterator_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
HashSkipListRep::HashSkipListRep(MemTableRep::KeyComparator& compare,
|
HashSkipListRep::HashSkipListRep(MemTableRep::KeyComparator& compare,
|
||||||
Arena* arena, const SliceTransform* transform, size_t bucket_size)
|
Arena* arena, const SliceTransform* transform,
|
||||||
: bucket_size_(bucket_size),
|
size_t bucket_size)
|
||||||
transform_(transform),
|
: bucket_size_(bucket_size),
|
||||||
compare_(compare),
|
transform_(transform),
|
||||||
arena_(arena),
|
compare_(compare),
|
||||||
empty_iterator_(std::make_shared<EmptyIterator>()) {
|
arena_(arena) {
|
||||||
|
|
||||||
buckets_ = new port::AtomicPointer[bucket_size];
|
buckets_ = new port::AtomicPointer[bucket_size];
|
||||||
|
|
||||||
for (size_t i = 0; i < bucket_size_; ++i) {
|
for (size_t i = 0; i < bucket_size_; ++i) {
|
||||||
@ -263,7 +258,7 @@ size_t HashSkipListRep::ApproximateMemoryUsage() {
|
|||||||
return sizeof(buckets_);
|
return sizeof(buckets_);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator> HashSkipListRep::GetIterator() {
|
MemTableRep::Iterator* HashSkipListRep::GetIterator() {
|
||||||
auto list = new Bucket(compare_, arena_);
|
auto list = new Bucket(compare_, arena_);
|
||||||
for (size_t i = 0; i < bucket_size_; ++i) {
|
for (size_t i = 0; i < bucket_size_; ++i) {
|
||||||
auto bucket = GetBucket(i);
|
auto bucket = GetBucket(i);
|
||||||
@ -274,35 +269,30 @@ std::shared_ptr<MemTableRep::Iterator> HashSkipListRep::GetIterator() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std::make_shared<Iterator>(list);
|
return new Iterator(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator> HashSkipListRep::GetPrefixIterator(
|
MemTableRep::Iterator* HashSkipListRep::GetPrefixIterator(const Slice& prefix) {
|
||||||
const Slice& prefix) {
|
|
||||||
auto bucket = GetBucket(prefix);
|
auto bucket = GetBucket(prefix);
|
||||||
if (bucket == nullptr) {
|
if (bucket == nullptr) {
|
||||||
return empty_iterator_;
|
return new EmptyIterator();
|
||||||
}
|
}
|
||||||
return std::make_shared<Iterator>(bucket, false);
|
return new Iterator(bucket, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator> HashSkipListRep::GetIterator(
|
MemTableRep::Iterator* HashSkipListRep::GetIterator(const Slice& slice) {
|
||||||
const Slice& slice) {
|
|
||||||
return GetPrefixIterator(transform_->Transform(slice));
|
return GetPrefixIterator(transform_->Transform(slice));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator>
|
MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator() {
|
||||||
HashSkipListRep::GetDynamicPrefixIterator() {
|
return new DynamicIterator(*this);
|
||||||
return std::make_shared<DynamicIterator>(*this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anon namespace
|
} // anon namespace
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep>
|
MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
|
||||||
HashSkipListRepFactory::CreateMemTableRep(MemTableRep::KeyComparator &compare,
|
MemTableRep::KeyComparator& compare, Arena* arena) {
|
||||||
Arena *arena) {
|
return new HashSkipListRep(compare, arena, transform_, bucket_count_);
|
||||||
return std::make_shared<HashSkipListRep>(compare, arena, transform_,
|
|
||||||
bucket_count_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MemTableRepFactory* NewHashSkipListRepFactory(
|
MemTableRepFactory* NewHashSkipListRepFactory(
|
||||||
|
@ -21,8 +21,8 @@ class HashSkipListRepFactory : public MemTableRepFactory {
|
|||||||
|
|
||||||
virtual ~HashSkipListRepFactory() { delete transform_; }
|
virtual ~HashSkipListRepFactory() { delete transform_; }
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
virtual MemTableRep* CreateMemTableRep(MemTableRep::KeyComparator& compare,
|
||||||
MemTableRep::KeyComparator& compare, Arena* arena) override;
|
Arena* arena) override;
|
||||||
|
|
||||||
virtual const char* Name() const override {
|
virtual const char* Name() const override {
|
||||||
return "HashSkipListRepFactory";
|
return "HashSkipListRepFactory";
|
||||||
|
@ -16,27 +16,38 @@
|
|||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
HistogramBucketMapper::HistogramBucketMapper() :
|
HistogramBucketMapper::HistogramBucketMapper()
|
||||||
// Add newer bucket index here.
|
:
|
||||||
// Should be alwyas added in sorted order.
|
// Add newer bucket index here.
|
||||||
bucketValues_({
|
// Should be alwyas added in sorted order.
|
||||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45,
|
// If you change this, you also need to change
|
||||||
50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450,
|
// size of array buckets_ in HistogramImpl
|
||||||
500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000,
|
bucketValues_(
|
||||||
3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, 14000,
|
{1, 2, 3, 4, 5, 6,
|
||||||
16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000,
|
7, 8, 9, 10, 12, 14,
|
||||||
70000, 80000, 90000, 100000, 120000, 140000, 160000, 180000, 200000,
|
16, 18, 20, 25, 30, 35,
|
||||||
250000, 300000, 350000, 400000, 450000, 500000, 600000, 700000, 800000,
|
40, 45, 50, 60, 70, 80,
|
||||||
900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, 2500000,
|
90, 100, 120, 140, 160, 180,
|
||||||
3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000,
|
200, 250, 300, 350, 400, 450,
|
||||||
9000000, 10000000, 12000000, 14000000, 16000000, 18000000, 20000000,
|
500, 600, 700, 800, 900, 1000,
|
||||||
25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 60000000,
|
1200, 1400, 1600, 1800, 2000, 2500,
|
||||||
70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000,
|
3000, 3500, 4000, 4500, 5000, 6000,
|
||||||
180000000, 200000000, 250000000, 300000000, 350000000, 400000000,
|
7000, 8000, 9000, 10000, 12000, 14000,
|
||||||
450000000, 500000000, 600000000, 700000000, 800000000, 900000000,
|
16000, 18000, 20000, 25000, 30000, 35000,
|
||||||
1000000000}),
|
40000, 45000, 50000, 60000, 70000, 80000,
|
||||||
maxBucketValue_(bucketValues_.back()),
|
90000, 100000, 120000, 140000, 160000, 180000,
|
||||||
minBucketValue_(bucketValues_.front()) {
|
200000, 250000, 300000, 350000, 400000, 450000,
|
||||||
|
500000, 600000, 700000, 800000, 900000, 1000000,
|
||||||
|
1200000, 1400000, 1600000, 1800000, 2000000, 2500000,
|
||||||
|
3000000, 3500000, 4000000, 4500000, 5000000, 6000000,
|
||||||
|
7000000, 8000000, 9000000, 10000000, 12000000, 14000000,
|
||||||
|
16000000, 18000000, 20000000, 25000000, 30000000, 35000000,
|
||||||
|
40000000, 45000000, 50000000, 60000000, 70000000, 80000000,
|
||||||
|
90000000, 100000000, 120000000, 140000000, 160000000, 180000000,
|
||||||
|
200000000, 250000000, 300000000, 350000000, 400000000, 450000000,
|
||||||
|
500000000, 600000000, 700000000, 800000000, 900000000, 1000000000}),
|
||||||
|
maxBucketValue_(bucketValues_.back()),
|
||||||
|
minBucketValue_(bucketValues_.front()) {
|
||||||
for (size_t i =0; i < bucketValues_.size(); ++i) {
|
for (size_t i =0; i < bucketValues_.size(); ++i) {
|
||||||
valueIndexMap_[bucketValues_[i]] = i;
|
valueIndexMap_[bucketValues_[i]] = i;
|
||||||
}
|
}
|
||||||
@ -62,24 +73,17 @@ namespace {
|
|||||||
const HistogramBucketMapper bucketMapper;
|
const HistogramBucketMapper bucketMapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
HistogramImpl::HistogramImpl() :
|
|
||||||
min_(bucketMapper.LastValue()),
|
|
||||||
max_(0),
|
|
||||||
num_(0),
|
|
||||||
sum_(0),
|
|
||||||
sum_squares_(0),
|
|
||||||
buckets_(std::vector<uint64_t>(bucketMapper.BucketCount(), 0)) {}
|
|
||||||
|
|
||||||
void HistogramImpl::Clear() {
|
void HistogramImpl::Clear() {
|
||||||
min_ = bucketMapper.LastValue();
|
min_ = bucketMapper.LastValue();
|
||||||
max_ = 0;
|
max_ = 0;
|
||||||
num_ = 0;
|
num_ = 0;
|
||||||
sum_ = 0;
|
sum_ = 0;
|
||||||
sum_squares_ = 0;
|
sum_squares_ = 0;
|
||||||
buckets_.resize(bucketMapper.BucketCount(), 0);
|
memset(buckets_, 0, sizeof buckets_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HistogramImpl::Empty() { return sum_squares_ == 0; }
|
||||||
|
|
||||||
void HistogramImpl::Add(uint64_t value) {
|
void HistogramImpl::Add(uint64_t value) {
|
||||||
const size_t index = bucketMapper.IndexForValue(value);
|
const size_t index = bucketMapper.IndexForValue(value);
|
||||||
buckets_[index] += 1;
|
buckets_[index] += 1;
|
||||||
|
@ -52,9 +52,8 @@ class HistogramBucketMapper {
|
|||||||
|
|
||||||
class HistogramImpl {
|
class HistogramImpl {
|
||||||
public:
|
public:
|
||||||
HistogramImpl();
|
|
||||||
virtual ~HistogramImpl() {}
|
|
||||||
virtual void Clear();
|
virtual void Clear();
|
||||||
|
virtual bool Empty();
|
||||||
virtual void Add(uint64_t value);
|
virtual void Add(uint64_t value);
|
||||||
void Merge(const HistogramImpl& other);
|
void Merge(const HistogramImpl& other);
|
||||||
|
|
||||||
@ -67,13 +66,14 @@ class HistogramImpl {
|
|||||||
virtual void Data(HistogramData * const data) const;
|
virtual void Data(HistogramData * const data) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double min_;
|
// To be able to use HistogramImpl as thread local variable, its constructor
|
||||||
double max_;
|
// has to be static. That's why we're using manually values from BucketMapper
|
||||||
double num_;
|
double min_ = 1000000000; // this is BucketMapper:LastValue()
|
||||||
double sum_;
|
double max_ = 0;
|
||||||
double sum_squares_;
|
double num_ = 0;
|
||||||
std::vector<uint64_t> buckets_;
|
double sum_ = 0;
|
||||||
|
double sum_squares_ = 0;
|
||||||
|
uint64_t buckets_[138] = {0}; // this is BucketMapper::BucketCount()
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -1024,7 +1024,7 @@ Status ReduceDBLevelsCommand::GetOldNumOfLevels(Options& opt,
|
|||||||
}
|
}
|
||||||
int max = -1;
|
int max = -1;
|
||||||
for (int i = 0; i < versions.NumberLevels(); i++) {
|
for (int i = 0; i < versions.NumberLevels(); i++) {
|
||||||
if (versions.NumLevelFiles(i)) {
|
if (versions.current()->NumLevelFiles(i)) {
|
||||||
max = i;
|
max = i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,9 +9,13 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
|
#include "rocksdb/compaction_filter.h"
|
||||||
|
#include "rocksdb/slice.h"
|
||||||
#include "rocksdb/write_batch.h"
|
#include "rocksdb/write_batch.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
|
|
||||||
|
using namespace rocksdb;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const int kNumKeys = 1100000;
|
const int kNumKeys = 1100000;
|
||||||
@ -26,12 +30,71 @@ std::string Key2(int i) {
|
|||||||
return Key1(i) + "_xxx";
|
return Key1(i) + "_xxx";
|
||||||
}
|
}
|
||||||
|
|
||||||
class ManualCompactionTest { };
|
class ManualCompactionTest {
|
||||||
|
public:
|
||||||
|
ManualCompactionTest() {
|
||||||
|
// Get rid of any state from an old run.
|
||||||
|
dbname_ = rocksdb::test::TmpDir() + "/rocksdb_cbug_test";
|
||||||
|
DestroyDB(dbname_, rocksdb::Options());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string dbname_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DestroyAllCompactionFilter : public CompactionFilter {
|
||||||
|
public:
|
||||||
|
DestroyAllCompactionFilter() {}
|
||||||
|
|
||||||
|
virtual bool Filter(int level,
|
||||||
|
const Slice& key,
|
||||||
|
const Slice& existing_value,
|
||||||
|
std::string* new_value,
|
||||||
|
bool* value_changed) const {
|
||||||
|
return existing_value.ToString() == "destroy";
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* Name() const {
|
||||||
|
return "DestroyAllCompactionFilter";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(ManualCompactionTest, CompactTouchesAllKeys) {
|
||||||
|
for (int iter = 0; iter < 2; ++iter) {
|
||||||
|
DB* db;
|
||||||
|
Options options;
|
||||||
|
if (iter == 0) { // level compaction
|
||||||
|
options.num_levels = 3;
|
||||||
|
options.compaction_style = kCompactionStyleLevel;
|
||||||
|
} else { // universal compaction
|
||||||
|
options.compaction_style = kCompactionStyleUniversal;
|
||||||
|
}
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.compression = rocksdb::kNoCompression;
|
||||||
|
options.compaction_filter = new DestroyAllCompactionFilter();
|
||||||
|
ASSERT_OK(DB::Open(options, dbname_, &db));
|
||||||
|
|
||||||
|
db->Put(WriteOptions(), Slice("key1"), Slice("destroy"));
|
||||||
|
db->Put(WriteOptions(), Slice("key2"), Slice("destroy"));
|
||||||
|
db->Put(WriteOptions(), Slice("key3"), Slice("value3"));
|
||||||
|
db->Put(WriteOptions(), Slice("key4"), Slice("destroy"));
|
||||||
|
|
||||||
|
Slice key4("key4");
|
||||||
|
db->CompactRange(nullptr, &key4);
|
||||||
|
Iterator* itr = db->NewIterator(ReadOptions());
|
||||||
|
itr->SeekToFirst();
|
||||||
|
ASSERT_TRUE(itr->Valid());
|
||||||
|
ASSERT_EQ("key3", itr->key().ToString());
|
||||||
|
itr->Next();
|
||||||
|
ASSERT_TRUE(!itr->Valid());
|
||||||
|
delete itr;
|
||||||
|
|
||||||
|
delete options.compaction_filter;
|
||||||
|
delete db;
|
||||||
|
DestroyDB(dbname_, options);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(ManualCompactionTest, Test) {
|
TEST(ManualCompactionTest, Test) {
|
||||||
// Get rid of any state from an old run.
|
|
||||||
std::string dbpath = rocksdb::test::TmpDir() + "/rocksdb_cbug_test";
|
|
||||||
DestroyDB(dbpath, rocksdb::Options());
|
|
||||||
|
|
||||||
// Open database. Disable compression since it affects the creation
|
// Open database. Disable compression since it affects the creation
|
||||||
// of layers and the code below is trying to test against a very
|
// of layers and the code below is trying to test against a very
|
||||||
@ -40,7 +103,7 @@ TEST(ManualCompactionTest, Test) {
|
|||||||
rocksdb::Options db_options;
|
rocksdb::Options db_options;
|
||||||
db_options.create_if_missing = true;
|
db_options.create_if_missing = true;
|
||||||
db_options.compression = rocksdb::kNoCompression;
|
db_options.compression = rocksdb::kNoCompression;
|
||||||
ASSERT_OK(rocksdb::DB::Open(db_options, dbpath, &db));
|
ASSERT_OK(rocksdb::DB::Open(db_options, dbname_, &db));
|
||||||
|
|
||||||
// create first key range
|
// create first key range
|
||||||
rocksdb::WriteBatch batch;
|
rocksdb::WriteBatch batch;
|
||||||
@ -83,7 +146,7 @@ TEST(ManualCompactionTest, Test) {
|
|||||||
|
|
||||||
// close database
|
// close database
|
||||||
delete db;
|
delete db;
|
||||||
DestroyDB(dbpath, rocksdb::Options());
|
DestroyDB(dbname_, rocksdb::Options());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
@ -90,15 +90,15 @@ public:
|
|||||||
// Unhide default implementations of GetIterator
|
// Unhide default implementations of GetIterator
|
||||||
using MemTableRep::GetIterator;
|
using MemTableRep::GetIterator;
|
||||||
|
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override {
|
virtual MemTableRep::Iterator* GetIterator() override {
|
||||||
return std::make_shared<SkipListRep::Iterator>(&skip_list_);
|
return new SkipListRep::Iterator(&skip_list_);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep> SkipListFactory::CreateMemTableRep (
|
MemTableRep* SkipListFactory::CreateMemTableRep(
|
||||||
MemTableRep::KeyComparator& compare, Arena* arena) {
|
MemTableRep::KeyComparator& compare, Arena* arena) {
|
||||||
return std::shared_ptr<MemTableRep>(new SkipListRep(compare, arena));
|
return new SkipListRep(compare, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -3,12 +3,48 @@
|
|||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
//
|
//
|
||||||
|
#include "util/statistics.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
|
std::shared_ptr<Statistics> CreateDBStatistics() {
|
||||||
|
return std::make_shared<StatisticsImpl>();
|
||||||
|
}
|
||||||
|
|
||||||
|
StatisticsImpl::StatisticsImpl() {}
|
||||||
|
|
||||||
|
StatisticsImpl::~StatisticsImpl() {}
|
||||||
|
|
||||||
|
long StatisticsImpl::getTickerCount(Tickers tickerType) {
|
||||||
|
assert(tickerType < TICKER_ENUM_MAX);
|
||||||
|
return tickers_[tickerType];
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatisticsImpl::setTickerCount(Tickers tickerType, uint64_t count) {
|
||||||
|
assert(tickerType < TICKER_ENUM_MAX);
|
||||||
|
tickers_[tickerType] = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatisticsImpl::recordTick(Tickers tickerType, uint64_t count) {
|
||||||
|
assert(tickerType < TICKER_ENUM_MAX);
|
||||||
|
tickers_[tickerType] += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatisticsImpl::measureTime(Histograms histogramType, uint64_t value) {
|
||||||
|
assert(histogramType < HISTOGRAM_ENUM_MAX);
|
||||||
|
histograms_[histogramType].Add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StatisticsImpl::histogramData(Histograms histogramType,
|
||||||
|
HistogramData* const data) {
|
||||||
|
assert(histogramType < HISTOGRAM_ENUM_MAX);
|
||||||
|
histograms_[histogramType].Data(data);
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// a buffer size used for temp string buffers
|
// a buffer size used for temp string buffers
|
||||||
const int kBufferSize = 200;
|
const int kBufferSize = 200;
|
||||||
|
|
||||||
@ -32,11 +68,8 @@ std::string HistogramToString (
|
|||||||
return std::string(buffer);
|
return std::string(buffer);
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string TickerToString (
|
std::string TickerToString(Statistics* dbstats, const Tickers& ticker,
|
||||||
Statistics* dbstats,
|
const std::string& name) {
|
||||||
const Tickers& ticker,
|
|
||||||
const std::string& name) {
|
|
||||||
|
|
||||||
char buffer[kBufferSize];
|
char buffer[kBufferSize];
|
||||||
snprintf(buffer, kBufferSize, "%s COUNT : %ld\n",
|
snprintf(buffer, kBufferSize, "%s COUNT : %ld\n",
|
||||||
name.c_str(), dbstats->getTickerCount(ticker));
|
name.c_str(), dbstats->getTickerCount(ticker));
|
||||||
|
53
util/statistics.h
Normal file
53
util/statistics.h
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#include "rocksdb/statistics.h"
|
||||||
|
#include "util/histogram.h"
|
||||||
|
#include "util/mutexlock.h"
|
||||||
|
|
||||||
|
#define UNLIKELY(val) (__builtin_expect((val), 0))
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
class StatisticsImpl : public Statistics {
|
||||||
|
public:
|
||||||
|
StatisticsImpl();
|
||||||
|
virtual ~StatisticsImpl();
|
||||||
|
|
||||||
|
virtual long getTickerCount(Tickers tickerType);
|
||||||
|
virtual void setTickerCount(Tickers tickerType, uint64_t count);
|
||||||
|
virtual void recordTick(Tickers tickerType, uint64_t count);
|
||||||
|
virtual void measureTime(Histograms histogramType, uint64_t value);
|
||||||
|
virtual void histogramData(Histograms histogramType,
|
||||||
|
HistogramData* const data);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::atomic_uint_fast64_t tickers_[TICKER_ENUM_MAX];
|
||||||
|
HistogramImpl histograms_[HISTOGRAM_ENUM_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
// Utility functions
|
||||||
|
inline void MeasureTime(Statistics* statistics, Histograms histogramType,
|
||||||
|
uint64_t value) {
|
||||||
|
if (statistics) {
|
||||||
|
statistics->measureTime(histogramType, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void RecordTick(Statistics* statistics, Tickers ticker,
|
||||||
|
uint64_t count = 1) {
|
||||||
|
if (statistics) {
|
||||||
|
statistics->recordTick(ticker, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void SetTickerCount(Statistics* statistics, Tickers ticker,
|
||||||
|
uint64_t count) {
|
||||||
|
if (statistics) {
|
||||||
|
statistics->setTickerCount(ticker, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,32 +0,0 @@
|
|||||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under the BSD-style license found in the
|
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
|
||||||
//
|
|
||||||
#pragma once
|
|
||||||
#include "rocksdb/statistics.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
// Utility functions
|
|
||||||
inline void RecordTick(Statistics* statistics,
|
|
||||||
Tickers ticker,
|
|
||||||
uint64_t count = 1) {
|
|
||||||
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
|
|
||||||
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
|
|
||||||
if (statistics) {
|
|
||||||
statistics->recordTick(ticker, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void SetTickerCount(Statistics* statistics,
|
|
||||||
Tickers ticker,
|
|
||||||
uint64_t count) {
|
|
||||||
assert(HistogramsNameMap.size() == HISTOGRAM_ENUM_MAX);
|
|
||||||
assert(TickersNameMap.size() == TICKER_ENUM_MAX);
|
|
||||||
if (statistics) {
|
|
||||||
statistics->setTickerCount(ticker, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -5,7 +5,7 @@
|
|||||||
//
|
//
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "util/statistics_imp.h"
|
#include "util/statistics.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
// Auto-scoped.
|
// Auto-scoped.
|
||||||
@ -28,11 +28,7 @@ class StopWatch {
|
|||||||
return env_->NowMicros() - start_time_;
|
return env_->NowMicros() - start_time_;
|
||||||
}
|
}
|
||||||
|
|
||||||
~StopWatch() {
|
~StopWatch() { MeasureTime(statistics_, histogram_name_, ElapsedMicros()); }
|
||||||
if (statistics_) {
|
|
||||||
statistics_->measureTime(histogram_name_, ElapsedMicros());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Env* const env_;
|
Env* const env_;
|
||||||
|
@ -88,7 +88,7 @@ class VectorRep : public MemTableRep {
|
|||||||
using MemTableRep::GetIterator;
|
using MemTableRep::GetIterator;
|
||||||
|
|
||||||
// Return an iterator over the keys in this representation.
|
// Return an iterator over the keys in this representation.
|
||||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override;
|
virtual MemTableRep::Iterator* GetIterator() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class Iterator;
|
friend class Iterator;
|
||||||
@ -228,22 +228,22 @@ void VectorRep::Iterator::SeekToLast() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep::Iterator> VectorRep::GetIterator() {
|
MemTableRep::Iterator* VectorRep::GetIterator() {
|
||||||
ReadLock l(&rwlock_);
|
ReadLock l(&rwlock_);
|
||||||
// Do not sort here. The sorting would be done the first time
|
// Do not sort here. The sorting would be done the first time
|
||||||
// a Seek is performed on the iterator.
|
// a Seek is performed on the iterator.
|
||||||
if (immutable_) {
|
if (immutable_) {
|
||||||
return std::make_shared<Iterator>(this, bucket_, compare_);
|
return new Iterator(this, bucket_, compare_);
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<Bucket> tmp;
|
std::shared_ptr<Bucket> tmp;
|
||||||
tmp.reset(new Bucket(*bucket_)); // make a copy
|
tmp.reset(new Bucket(*bucket_)); // make a copy
|
||||||
return std::make_shared<Iterator>(nullptr, tmp, compare_);
|
return new Iterator(nullptr, tmp, compare_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // anon namespace
|
} // anon namespace
|
||||||
|
|
||||||
std::shared_ptr<MemTableRep> VectorRepFactory::CreateMemTableRep(
|
MemTableRep* VectorRepFactory::CreateMemTableRep(
|
||||||
MemTableRep::KeyComparator& compare, Arena* arena) {
|
MemTableRep::KeyComparator& compare, Arena* arena) {
|
||||||
return std::make_shared<VectorRep>(compare, arena, count_);
|
return new VectorRep(compare, arena, count_);
|
||||||
}
|
}
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
Loading…
x
Reference in New Issue
Block a user