Compare commits

..

8 Commits

Author SHA1 Message Date
sdong
5997edef44 Add 6.24.2 2021-09-17 14:16:18 -07:00
anand76
1ab38a19df More robust checking of IO uring completion data (#8894)
Summary:
Potential bugs in the IO uring implementation can cause bad data to be returned in the completion queue. Add some checks in the PosixRandomAccessFile::MultiRead completion handling code to catch such errors and fail the entire MultiRead. Also log some diagnostic messages and stack trace.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8894

Reviewed By: siying, pdillinger

Differential Revision: D30826982

Pulled By: anand1976

fbshipit-source-id: af91815ac760e095d6cc0466cf8bd5c10167fd15
2021-09-16 18:16:06 -07:00
anand76
9b2b87ec15 Update patch version to 6.24.1 2021-09-02 13:28:09 -07:00
anand76
1fe5ac3468 Fix a race in LRUCacheShard::Promote (#8717)
Summary:
In ```LRUCacheShard::Promote```, a reference is released outside the LRU mutex. Fix the race condition.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8717

Reviewed By: zhichao-cao

Differential Revision: D30649206

Pulled By: anand1976

fbshipit-source-id: 09c0af05b2294a7fe2c02876a61b0bad6e3ada61
2021-08-31 18:02:15 -07:00
mrambacher
3efc6fd641 Make Configurable/Customizable options copyable (#8704)
Summary:
The atomic variable "is_prepared_" was keeping Configurable objects from being copy-constructed.  Removed the atomic to allow copies.

Since the variable is only changed from false to true (and never back), there is no reason it had to be atomic.

Added tests that simple Configurable and Customizable objects can be put on the stack and copied.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8704

Reviewed By: anand1976

Differential Revision: D30530526

Pulled By: ltamasi

fbshipit-source-id: 4dd4439b3e5ad7fa396573d0b25d9fb709160576
2021-08-25 17:51:50 -07:00
Peter Dillinger
3a3f4801d0 Allow intentionally swallowed errors in BlockBasedFilterBlockReader (#8695)
Summary:
To avoid getting "Didn't get expected error from Get" from
crash test by enabling block-based filter in crash test in https://github.com/facebook/rocksdb/issues/8679.
Basically, this applies the pattern of IGNORE_STATUS_IF_ERROR in
full_filter_block.cc to block_based_filter_block.cc

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8695

Test Plan: watch for resolution of crash test runs

Reviewed By: ltamasi

Differential Revision: D30496748

Pulled By: pdillinger

fbshipit-source-id: f7808fcf14c0e787fe81da03fa8303244590d273
2021-08-23 16:12:50 -07:00
Peter Dillinger
87b20cbba0 Fix typo in 6.24.0 HISTORY.md (#8694)
Summary:
fix typo

Also, clarified change of C API signatures.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8694

Test Plan: visual

Reviewed By: ltamasi

Differential Revision: D30492882

Pulled By: pdillinger

fbshipit-source-id: ac6dc3dcefa01c91fd87fc7f50279ea5e13fa41d
2021-08-23 13:46:56 -07:00
mrambacher
c2c92f3013 Fix LITE build (#8689)
Summary:
Conditional compilation of static functions not used in LITE mode.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8689

Reviewed By: ltamasi

Differential Revision: D30476218

Pulled By: mrambacher

fbshipit-source-id: 5f3af90982d34818f47d2cb1d36dd5816d0333a5
2021-08-23 09:49:34 -07:00
1038 changed files with 39164 additions and 88425 deletions

54
.circleci/cat_ignore_eagain Executable file
View File

@ -0,0 +1,54 @@
#! /bin/bash
# Work around issue with parallel make output causing random error, as in
# make[1]: write error: stdout
# Probably due to a kernel bug:
# https://bugs.launchpad.net/ubuntu/+source/linux-signed/+bug/1814393
# Seems to affect image ubuntu-1604:201903-01 and ubuntu-1604:202004-01
cd "$(dirname $0)"
if [ ! -x cat_ignore_eagain.out ]; then
cc -x c -o cat_ignore_eagain.out - << EOF
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
int main() {
int n, m, p;
char buf[1024];
for (;;) {
n = read(STDIN_FILENO, buf, 1024);
if (n > 0 && n <= 1024) {
for (m = 0; m < n;) {
p = write(STDOUT_FILENO, buf + m, n - m);
if (p < 0) {
if (errno == EAGAIN) {
// ignore but pause a bit
usleep(100);
} else {
perror("write failed");
return 42;
}
} else {
m += p;
}
}
} else if (n < 0) {
if (errno == EAGAIN) {
// ignore but pause a bit
usleep(100);
} else {
// Some non-ignorable error
perror("read failed");
return 43;
}
} else {
// EOF
return 0;
}
}
}
EOF
fi
exec ./cat_ignore_eagain.out

View File

@ -2,13 +2,21 @@ version: 2.1
orbs:
win: circleci/windows@2.4.0
slack: circleci/slack@3.4.2
aliases:
- &notify-on-main-failure
- &notify-on-master-failure
fail_only: true
only_for_branches: main
only_for_branches: master
commands:
install-pyenv-on-macos:
steps:
- run:
name: Install pyenv on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install pyenv
install-cmake-on-macos:
steps:
- run:
@ -16,13 +24,6 @@ commands:
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake
install-jdk8-on-macos:
steps:
- run:
name: Install JDK 8 on macos
command: |
brew install --cask adoptopenjdk/openjdk/adoptopenjdk8
increase-max-open-files-on-macos:
steps:
- run:
@ -33,29 +34,33 @@ commands:
sudo launchctl limit maxfiles 1048576
pre-steps:
parameters:
python-version:
default: "3.5.9"
type: string
steps:
- checkout
- run: pyenv install --skip-existing <<parameters.python-version>>
- run: pyenv global <<parameters.python-version>>
- run:
name: Setup Environment Variables
command: |
echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV
echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV
echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV
echo "export PRINT_PARALLEL_OUTPUTS=1" >> $BASH_ENV
echo "export GTEST_COLOR=1" >> $BASH_ENV
echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV
echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV
echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV
echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV
echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV
echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV
echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV
pre-steps-macos:
steps:
- pre-steps
- pre-steps:
python-version: "3.6.0"
post-steps:
steps:
- slack/status: *notify-on-master-failure
- store_test_results: # store test result if there's any
path: /tmp/test-results
- store_artifacts: # store LOG for debugging if there's any
@ -74,23 +79,11 @@ commands:
- run:
name: Install Clang 10
command: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
sudo apt-get update -y && sudo apt-get install -y clang-10
install-clang-13:
steps:
- run:
name: Install Clang 13
command: |
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-get update -y && sudo apt-get install -y clang-13
install-gflags:
steps:
- run:
@ -100,22 +93,10 @@ commands:
install-benchmark:
steps:
- run:
name: Install ninja build
command: sudo apt-get update -y && sudo apt-get install -y ninja-build
- run:
- run: # currently doesn't support ubuntu-1604 which doesn't have libbenchmark package, user can still install by building it youself
name: Install benchmark
command: |
git clone --depth 1 --branch v1.6.1 https://github.com/google/benchmark.git ~/benchmark
cd ~/benchmark && mkdir build && cd build
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0
ninja && sudo ninja install
install-valgrind:
steps:
- run:
name: Install valgrind
command: sudo apt-get update -y && sudo apt-get install -y valgrind
sudo apt-get update -y && sudo apt-get install -y libbenchmark-dev
upgrade-cmake:
steps:
@ -147,21 +128,6 @@ commands:
command: |
sudo apt-get update -y && sudo apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev
install-libprotobuf-mutator:
steps:
- run:
name: Install libprotobuf-mutator libs
command: |
git clone --single-branch --branch master --depth 1 git@github.com:google/libprotobuf-mutator.git ~/libprotobuf-mutator
cd ~/libprotobuf-mutator && mkdir build && cd build
cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON
ninja && sudo ninja install
- run:
name: Setup environment variables
command: |
echo "export PKG_CONFIG_PATH=/usr/local/OFF/:~/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/" >> $BASH_ENV
echo "export PROTOC_BIN=~/libprotobuf-mutator/build/external.protobuf/bin/protoc" >> $BASH_ENV
executors:
windows-2xlarge:
machine:
@ -172,341 +138,255 @@ executors:
jobs:
build-macos:
macos:
xcode: 12.5.1
xcode: 11.3.0
resource_class: large
environment:
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now
steps:
- increase-max-open-files-on-macos
- install-pyenv-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
- run: ulimit -S -n 1048576 && OPT=-DCIRCLECI make V=1 J=32 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-macos-cmake:
macos:
xcode: 12.5.1
xcode: 11.3.0
resource_class: large
parameters:
run_even_tests:
description: run even or odd tests, used to split tests to 2 groups
type: boolean
default: true
steps:
- increase-max-open-files-on-macos
- install-pyenv-on-macos
- install-cmake-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run:
name: "cmake generate project file"
command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
- run:
name: "Build tests"
command: cd build && make V=1 -j32
- when:
condition: << parameters.run_even_tests >>
steps:
- run:
name: "Run even tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
- when:
condition:
not: << parameters.run_even_tests >>
steps:
- run:
name: "Run odd tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
- run: ulimit -S -n 1048576 && (mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. && make V=1 -j32 && ctest -j10) | .circleci/cat_ignore_eagain
- post-steps
build-linux:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: make V=1 J=32 -j32 check
- run: make V=1 J=32 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-encrypted_env-no_compression:
build-linux-mem-env:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
- run: |
./sst_dump --help | egrep -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
- run: MEM_ENV=1 make V=1 J=32 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-encrypted-env:
machine:
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ENCRYPTED_ENV=1 make V=1 J=32 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-shared_lib-alt_namespace-status_checked:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 check
- run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-release:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
image: ubuntu-1604:202104-01
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: make V=1 -j32 release
- run: make V=1 -j8 release | .circleci/cat_ignore_eagain
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: make V=1 -j32 release
- run: make V=1 -j8 release | .circleci/cat_ignore_eagain
- run: ./db_stress --version # ensure with gflags
- post-steps
build-linux-release-rtti:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
image: ubuntu-1604:201903-01
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j8 static_lib tools db_bench | .circleci/cat_ignore_eagain
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j8 static_lib tools db_bench | .circleci/cat_ignore_eagain
- run: ./db_stress --version # ensure with gflags
build-linux-lite:
machine:
image: ubuntu-2004:202111-02
resource_class: large
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: LITE=1 make V=1 J=8 -j8 check
- run: LITE=1 make V=1 J=32 -j32 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-lite-release:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: LITE=1 make V=1 -j8 release
- run: LITE=1 make V=1 -j8 release | .circleci/cat_ignore_eagain
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: LITE=1 make V=1 -j8 release
- run: LITE=1 make V=1 -j8 release | .circleci/cat_ignore_eagain
- run: ./db_stress --version # ensure with gflags
- post-steps
build-linux-clang-no_test_run:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: sudo apt-get update -y && sudo apt-get install -y clang libgflags-dev libtbb-dev
- run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all
- run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all | .circleci/cat_ignore_eagain
- post-steps
build-linux-clang10-asan:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-mini-tsan:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
# find test list by `make list_all_tests`
parameters:
start_test:
default: ""
type: string
end_test:
default: ""
type: string
steps:
- pre-steps
- install-gflags
- install-clang-10
- install-gtest-parallel
- run:
name: "Build unit tests"
command: |
echo "env: $(env)"
ROCKSDBTESTS_START=<<parameters.start_test>> ROCKSDBTESTS_END=<<parameters.end_test>> ROCKSDBTESTS_SUBSET_TESTS_TO_FILE=/tmp/test_list COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 --output-sync=target build_subset_tests
- run:
name: "Run unit tests in parallel"
command: |
sed -i 's/[[:space:]]*$//; s/ / \.\//g; s/.*/.\/&/' /tmp/test_list
cat /tmp/test_list
gtest-parallel $(</tmp/test_list) --output_dir=/tmp | cat # pipe to cat to continuously output status on circleci UI. Otherwise, no status will be printed while the job is running.
- run: COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out.
- post-steps
build-linux-clang10-ubsan:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-valgrind:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-valgrind
- run: PORTABLE=1 make V=1 -j32 valgrind_test
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-clang-analyze:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: sudo apt-get update -y && sudo apt-get install -y clang-tools-10
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze | .circleci/cat_ignore_eagain # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- post-steps
build-linux-cmake-with-folly:
build-linux-cmake:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- upgrade-cmake
- run: make checkout_folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20) | .circleci/cat_ignore_eagain
- post-steps
build-linux-cmake-with-benchmark:
build-linux-cmake-ubuntu-20:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-2004:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-benchmark
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20)
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20 && make microbench) | .circleci/cat_ignore_eagain
- post-steps
build-linux-unity-and-headers:
build-linux-unity:
docker: # executor type
- image: gcc:latest
environment:
EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: apt-get update -y && apt-get install -y libgflags-dev
- run: make V=1 -j8 unity_test
- run: make V=1 -j8 -k check-headers # could be moved to a different build
- run: TEST_TMPDIR=/dev/shm && make V=1 -j8 unity_test | .circleci/cat_ignore_eagain
- post-steps
build-linux-gcc-7-with-folly:
build-linux-gcc-4_8-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
image: ubuntu-1604:202104-01
resource_class: large
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-7 g++-7 libgflags-dev
- run: make checkout_folly
- run: USE_FOLLY=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- run: sudo apt-get update -y && sudo apt-get install gcc-4.8 g++-4.8 libgflags-dev
- run: CC=gcc-4.8 CXX=g++-4.8 V=1 SKIP_LINK=1 make -j8 all | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI
- post-steps
build-linux-gcc-8-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
image: ubuntu-2004:202010-01
resource_class: large
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-8 g++-8 libgflags-dev
- run: CC=gcc-8 CXX=g++-8 V=1 make -j16 all
- run: sudo apt-get update -y && sudo apt-get install gcc-8 g++-8 libgflags-dev
- run: CC=gcc-8 CXX=g++-8 V=1 SKIP_LINK=1 make -j8 all | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI
- post-steps
build-linux-gcc-9-no_test_run:
machine:
image: ubuntu-2004:202010-01
resource_class: large
steps:
- pre-steps
- run: sudo apt-get update -y && sudo apt-get install gcc-9 g++-9 libgflags-dev
- run: CC=gcc-9 CXX=g++-9 V=1 SKIP_LINK=1 make -j8 all | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI
- post-steps
build-linux-gcc-10-cxx20-no_test_run:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-2004:202010-01
resource_class: xlarge
steps:
- pre-steps
- run: sudo apt-get update -y && sudo apt-get install gcc-10 g++-10 libgflags-dev
- run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j16 all
- post-steps
build-linux-gcc-11-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-11 g++-11 libgflags-dev
- install-benchmark
- run: CC=gcc-11 CXX=g++-11 V=1 make -j16 all microbench
- post-steps
build-linux-clang-13-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- install-clang-13
- install-benchmark
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all microbench
- post-steps
# Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
build-linux-clang-13-asan-ubsan-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-clang-13
- install-gflags
- run: make checkout_folly
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
- run: CC=gcc-10 CXX=g++-10 V=1 SKIP_LINK=1 ROCKSDB_CXX_STANDARD=c++20 make -j16 all | .circleci/cat_ignore_eagain # Linking broken because libgflags compiled with newer ABI
- post-steps
# This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
build-linux-run-microbench:
build-linux-microbench:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
image: ubuntu-2004:202010-01
resource_class: xlarge
steps:
- pre-steps
- install-benchmark
- run: DEBUG_LEVEL=0 make -j32 run_microbench
- post-steps
build-linux-mini-crashtest:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
- run: DEBUG_LEVEL=0 make microbench | .circleci/cat_ignore_eagain
- post-steps
build-windows:
@ -579,7 +459,7 @@ jobs:
build-linux-java:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
@ -593,14 +473,17 @@ jobs:
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Shared Library"
command: make V=1 J=8 -j8 rocksdbjava | .circleci/cat_ignore_eagain
- run:
name: "Test RocksDBJava"
command: make V=1 J=8 -j8 jtest
command: make V=1 J=8 -j8 jtest | .circleci/cat_ignore_eagain
- post-steps
build-linux-java-static:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
@ -616,20 +499,19 @@ jobs:
which javac && javac -version
- run:
name: "Build RocksDBJava Static Library"
command: make V=1 J=8 -j8 rocksdbjavastatic
command: make V=1 J=8 -j8 rocksdbjavastatic | .circleci/cat_ignore_eagain
- post-steps
build-macos-java:
macos:
xcode: 12.5.1
resource_class: large
xcode: 11.3.0
resource_class: medium
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash
steps:
- increase-max-open-files-on-macos
- install-pyenv-on-macos
- install-gflags-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
@ -638,22 +520,25 @@ jobs:
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Shared Library"
command: make V=1 J=8 -j8 rocksdbjava | .circleci/cat_ignore_eagain
- run:
name: "Test RocksDBJava"
command: make V=1 J=16 -j16 jtest
command: make V=1 J=8 -j8 jtest | .circleci/cat_ignore_eagain
- post-steps
build-macos-java-static:
macos:
xcode: 12.5.1
resource_class: large
xcode: 11.3.0
resource_class: medium
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-pyenv-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
@ -663,37 +548,13 @@ jobs:
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava x86 and ARM Static Libraries"
command: make V=1 J=16 -j16 rocksdbjavastaticosx
- post-steps
build-macos-java-static-universal:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Universal Binary Static Library"
command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub
name: "Build RocksDBJava Static Library"
command: make V=1 J=8 -j8 rocksdbjavastatic | .circleci/cat_ignore_eagain
- post-steps
build-examples:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: large
steps:
- pre-steps
@ -701,13 +562,13 @@ jobs:
- run:
name: "Build examples"
command: |
OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4
OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4 | ../.circleci/cat_ignore_eagain
- post-steps
build-cmake-mingw:
machine:
image: ubuntu-2004:202111-02
resource_class: large
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
@ -729,39 +590,57 @@ jobs:
build-linux-non-shm:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
environment:
TEST_TMPDIR: /tmp/rocksdb_test_tmp
parameters:
start_test:
default: ""
type: string
end_test:
default: ""
type: string
steps:
- pre-steps
- install-gflags
- run: make V=1 -j32 check
- install-gtest-parallel
- run:
name: "Build unit tests"
command: |
echo "env: $(env)"
echo "** done env"
ROCKSDBTESTS_START=<<parameters.start_test>> ROCKSDBTESTS_END=<<parameters.end_test>> ROCKSDBTESTS_SUBSET_TESTS_TO_FILE=/tmp/test_list make V=1 -j32 --output-sync=target build_subset_tests
- run:
name: "Run unit tests in parallel"
command: |
sed -i 's/[[:space:]]*$//; s/ / \.\//g; s/.*/.\/&/' /tmp/test_list
cat /tmp/test_list
export TEST_TMPDIR=/tmp/rocksdb_test_tmp
gtest-parallel $(</tmp/test_list) --output_dir=/tmp | cat # pipe to cat to continuously output status on circleci UI. Otherwise, no status will be printed while the job is running.
- post-steps
build-linux-arm-test-full:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-2004:202101-01
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: make V=1 J=4 -j4 check
- run: make V=1 J=4 -j4 check | .circleci/cat_ignore_eagain
- post-steps
build-linux-arm:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-2004:202101-01
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some
- run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some | .circleci/cat_ignore_eagain
- post-steps
build-linux-arm-cmake-no_test_run:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-2004:202101-01
resource_class: arm.large
environment:
JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64
@ -794,7 +673,7 @@ jobs:
build-format-compatible:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:202104-01
resource_class: 2xlarge
steps:
- pre-steps
@ -809,103 +688,120 @@ jobs:
tools/check_format_compatible.sh
- post-steps
build-fuzzers:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-clang-13
- run: sudo apt-get update -y && sudo apt-get install -y cmake ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool
- install-libprotobuf-mutator
- run:
name: "Build rocksdb lib"
command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib
- run:
name: "Build fuzzers"
command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer
- post-steps
workflows:
version: 2
jobs-linux-run-tests:
build-linux:
jobs:
- build-linux
- build-linux-cmake-with-folly
- build-linux-gcc-7-with-folly
- build-linux-cmake-with-benchmark
- build-linux-encrypted_env-no_compression
- build-linux-lite
jobs-linux-run-tests-san:
build-linux-cmake:
jobs:
- build-linux-cmake
- build-linux-cmake-ubuntu-20
build-linux-mem-env:
jobs:
- build-linux-mem-env
build-linux-encrypted-env:
jobs:
- build-linux-encrypted-env
build-linux-shared_lib-alt_namespace-status_checked:
jobs:
- build-linux-clang10-asan
- build-linux-clang10-ubsan
- build-linux-clang10-mini-tsan:
start_test: ""
end_test: "env_test"
- build-linux-clang10-mini-tsan:
start_test: "env_test"
end_test: ""
- build-linux-shared_lib-alt_namespace-status_checked
jobs-linux-no-test-run:
build-linux-lite:
jobs:
- build-linux-lite
build-linux-release:
jobs:
- build-linux-release
build-linux-release-rtti:
jobs:
- build-linux-release-rtti
build-linux-lite-release:
jobs:
- build-linux-lite-release
- build-examples
- build-fuzzers
- build-linux-clang-no_test_run
- build-linux-clang-13-no_test_run
- build-linux-gcc-8-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-gcc-11-no_test_run
- build-linux-arm-cmake-no_test_run
jobs-linux-other-checks:
build-linux-clang10-asan:
jobs:
- build-linux-clang10-asan
build-linux-clang10-mini-tsan:
jobs:
- build-linux-clang10-mini-tsan
build-linux-clang10-ubsan:
jobs:
- build-linux-clang10-ubsan
build-linux-clang10-clang-analyze:
jobs:
- build-linux-clang10-clang-analyze
- build-linux-unity-and-headers
- build-linux-mini-crashtest
jobs-windows:
build-linux-unity:
jobs:
- build-linux-unity
build-windows-vs2019:
jobs:
- build-windows:
name: "build-windows-vs2019"
build-windows-vs2019-cxx20:
jobs:
- build-windows:
name: "build-windows-vs2019-cxx20"
extra_cmake_opt: -DCMAKE_CXX_STANDARD=20
build-windows-vs2017:
jobs:
- build-windows:
name: "build-windows-vs2017"
vs_year: "2017"
cmake_generator: "Visual Studio 15 Win64"
- build-cmake-mingw
jobs-java:
build-java:
jobs:
- build-linux-java
- build-linux-java-static
- build-macos-java
- build-macos-java-static
- build-macos-java-static-universal
jobs-macos:
build-examples:
jobs:
- build-examples
build-linux-non-shm:
jobs:
- build-linux-non-shm:
start_test: ""
end_test: "db_options_test" # make sure unique in src.mk
- build-linux-non-shm:
start_test: "db_options_test" # make sure unique in src.mk
end_test: "filename_test" # make sure unique in src.mk
- build-linux-non-shm:
start_test: "filename_test" # make sure unique in src.mk
end_test: "statistics_test" # make sure unique in src.mk
- build-linux-non-shm:
start_test: "statistics_test" # make sure unique in src.mk
end_test: ""
build-linux-compilers-no_test_run:
jobs:
- build-linux-clang-no_test_run
- build-linux-gcc-4_8-no_test_run
- build-linux-gcc-8-no_test_run
- build-linux-gcc-9-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-arm-cmake-no_test_run
build-macos:
jobs:
- build-macos
- build-macos-cmake:
run_even_tests: true
- build-macos-cmake:
run_even_tests: false
jobs-linux-arm:
build-macos-cmake:
jobs:
- build-macos-cmake
build-cmake-mingw:
jobs:
- build-cmake-mingw
build-linux-arm:
jobs:
- build-linux-arm
build-microbench:
jobs:
- build-linux-microbench
nightly:
triggers:
- schedule:
cron: "0 9 * * *"
cron: "0 0 * * *"
filters:
branches:
only:
- main
- master
jobs:
- build-format-compatible
- build-linux-arm-test-full
- build-linux-run-microbench
- build-linux-non-shm
- build-linux-clang-13-asan-ubsan-with-folly
- build-linux-valgrind

View File

@ -32,13 +32,10 @@ jobs:
- name: Download clang-format-diff.py
uses: wei/wget@v1
with:
args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
args: https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py
- name: Check format
run: VERBOSE_CHECK=1 make check-format
- name: Compare buckify output
run: make check-buck-targets
- name: Simple source code checks
run: make check-sources

1
.gitignore vendored
View File

@ -95,4 +95,3 @@ fuzz/proto/gen/
fuzz/crash-*
cmake-build-*
third-party/folly/

View File

@ -5,7 +5,6 @@ os:
arch:
- arm64
- ppc64le
- s390x
compiler:
- clang
- gcc
@ -43,18 +42,23 @@ env:
- JOB_NAME=cmake-gcc9 # 3-5 minutes
- JOB_NAME=cmake-gcc9-c++20 # 3-5 minutes
- JOB_NAME=cmake-mingw # 3 minutes
- JOB_NAME=make-gcc4.8
- JOB_NAME=status_checked
matrix:
exclude:
- os : linux
arch: arm64
env: JOB_NAME=cmake-mingw
- os : linux
arch: arm64
env: JOB_NAME=make-gcc4.8
- os: linux
arch: ppc64le
env: JOB_NAME=cmake-mingw
- os: linux
arch: s390x
env: JOB_NAME=cmake-mingw
arch: ppc64le
env: JOB_NAME=make-gcc4.8
- os: linux
compiler: clang
- if: type = pull_request AND commit_message !~ /FULL_CI/
@ -69,10 +73,6 @@ matrix:
os: linux
arch: ppc64le
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -81,10 +81,6 @@ matrix:
os: linux
arch: ppc64le
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -93,10 +89,6 @@ matrix:
os: linux
arch: ppc64le
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -105,10 +97,6 @@ matrix:
os: linux
arch: ppc64le
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -121,10 +109,6 @@ matrix:
os: linux
arch: ppc64le
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
os: linux
arch: s390x
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -133,10 +117,6 @@ matrix:
os: linux
arch: ppc64le
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -145,10 +125,6 @@ matrix:
os: linux
arch: ppc64le
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -157,10 +133,6 @@ matrix:
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -169,10 +141,6 @@ matrix:
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
@ -183,11 +151,14 @@ matrix:
env: JOB_NAME=cmake-gcc9-c++20
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: s390x
env: JOB_NAME=cmake-gcc9-c++20
arch: arm64
env: JOB_NAME=status_checked
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=status_checked
install:
- CC=gcc-7 && CXX=g++-7
- if [ "${JOB_NAME}" == cmake-gcc8 ]; then
sudo apt-get install -y g++-8 || exit $?;
CC=gcc-8 && CXX=g++-8;
@ -199,8 +170,9 @@ install:
- if [ "${JOB_NAME}" == cmake-mingw ]; then
sudo apt-get install -y mingw-w64 || exit $?;
fi
- if [ "${CXX}" == "g++-7" ]; then
sudo apt-get install -y g++-7 || exit $?;
- if [ "${JOB_NAME}" == make-gcc4.8 ]; then
sudo apt-get install -y g++-4.8 || exit $?;
CC=gcc-4.8 && CXX=g++-4.8;
fi
- |
if [[ "${JOB_NAME}" == cmake* ]]; then
@ -230,25 +202,21 @@ before_script:
script:
- date; ${CXX} --version
- if [ `command -v ccache` ]; then ccache -C; fi
- export MK_PARALLEL=4;
if [[ "$TRAVIS_CPU_ARCH" == s390x ]]; then
export MK_PARALLEL=1;
fi
- case $TEST_GROUP in
platform_dependent)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=only make -j$MK_PARALLEL all_but_some_tests check_some
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=only make -j4 all_but_some_tests check_some
;;
1)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_END=backup_engine_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_END=backupable_db_test make -j4 check_some
;;
2)
OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 make -j$MK_PARALLEL tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=backup_engine_test ROCKSDBTESTS_END=db_universal_compaction_test make -j$MK_PARALLEL check_some
OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 make -j4 tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=backupable_db_test ROCKSDBTESTS_END=db_universal_compaction_test make -j4 check_some
;;
3)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=db_universal_compaction_test ROCKSDBTESTS_END=table_properties_collector_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=db_universal_compaction_test ROCKSDBTESTS_END=table_properties_collector_test make -j4 check_some
;;
4)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=table_properties_collector_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=table_properties_collector_test make -j4 check_some
;;
esac
- case $JOB_NAME in
@ -256,10 +224,10 @@ script:
OPT=-DTRAVIS LIB_MODE=shared V=1 make rocksdbjava jtest
;;
lite_build)
OPT='-DTRAVIS -DROCKSDB_LITE' LIB_MODE=shared V=1 make -j$MK_PARALLEL all
OPT='-DTRAVIS -DROCKSDB_LITE' LIB_MODE=shared V=1 make -j4 all
;;
examples)
OPT=-DTRAVIS LIB_MODE=shared V=1 make -j$MK_PARALLEL static_lib && cd examples && make -j$MK_PARALLEL
OPT=-DTRAVIS LIB_MODE=shared V=1 make -j4 static_lib && cd examples && make -j4
;;
cmake-mingw)
sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix;
@ -272,7 +240,13 @@ script:
;;
esac
mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j$MK_PARALLEL && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j$MK_PARALLEL rocksdb rocksdbjni
mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j4 && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j4 rocksdb rocksdbjni
;;
make-gcc4.8)
OPT=-DTRAVIS LIB_MODE=shared V=1 SKIP_LINK=1 make -j4 all && [ "Linking broken because libgflags compiled with newer ABI" ]
;;
status_checked)
OPT=-DTRAVIS LIB_MODE=shared V=1 ASSERT_STATUS_CHECKED=1 make -j4 check_some
;;
esac
notifications:

View File

@ -2,7 +2,7 @@
# This cmake build is for Windows 64-bit only.
#
# Prerequisites:
# You must have at least Visual Studio 2019. Start the Developer Command Prompt window that is a part of Visual Studio installation.
# You must have at least Visual Studio 2015 Update 3. Start the Developer Command Prompt window that is a part of Visual Studio installation.
# Run the build commands from within the Developer Command Prompt window to have paths to the compiler and runtime libraries set.
# You must have git.exe in your %PATH% environment variable.
#
@ -14,7 +14,7 @@
# cd build
# 3. Run cmake to generate project files for Windows, add more options to enable required third-party libraries.
# See thirdparty.inc for more information.
# sample command: cmake -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 -DWITH_SNAPPY=1 -DWITH_JEMALLOC=1 -DWITH_JNI=1 ..
# sample command: cmake -G "Visual Studio 15 Win64" -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 -DWITH_SNAPPY=1 -DWITH_JEMALLOC=1 -DWITH_JNI=1 ..
# 4. Then build the project in debug mode (you may want to add /m[:<N>] flag to run msbuild in <N> parallel threads
# or simply /m to use all avail cores)
# msbuild rocksdb.sln
@ -27,7 +27,7 @@
#
# Linux:
#
# 1. Install a recent toolchain if you're on a older distro. C++17 required (GCC >= 7, Clang >= 5)
# 1. Install a recent toolchain such as devtoolset-3 if you're on a older distro. C++11 required.
# 2. mkdir build; cd build
# 3. cmake ..
# 4. make -j
@ -40,8 +40,6 @@ include(GoogleTest)
get_rocksdb_version(rocksdb_VERSION)
project(rocksdb
VERSION ${rocksdb_VERSION}
DESCRIPTION "An embeddable persistent key-value store for fast storage"
HOMEPAGE_URL https://rocksdb.org/
LANGUAGES CXX C ASM)
if(POLICY CMP0042)
@ -80,8 +78,21 @@ if ($ENV{CIRCLECI})
add_definitions(-DCIRCLECI)
endif()
# third-party/folly is only validated to work on Linux and Windows for now.
# So only turn it on there by default.
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows")
if(MSVC AND MSVC_VERSION LESS 1910)
# Folly does not compile with MSVC older than VS2017
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
endif()
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
endif()
if( NOT DEFINED CMAKE_CXX_STANDARD )
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 11)
endif()
include(CMakeDependentOption)
@ -171,6 +182,26 @@ else()
endif()
endif()
string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC)
set(BUILD_DATE "${TS}" CACHE STRING "the time we first built rocksdb")
find_package(Git)
if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD )
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE GIT_MOD COMMAND "${GIT_EXECUTABLE}" diff-index HEAD --quiet)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE COMMAND "${GIT_EXECUTABLE}" log -1 --date=format:"%Y-%m-%d %T" --format="%ad")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG RESULT_VARIABLE rv COMMAND "${GIT_EXECUTABLE}" symbolic-ref -q --short HEAD OUTPUT_STRIP_TRAILING_WHITESPACE)
if (rv AND NOT rv EQUAL 0)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
else()
set(GIT_SHA 0)
set(GIT_MOD 1)
endif()
string(REGEX REPLACE "[^0-9a-fA-F]+" "" GIT_SHA "${GIT_SHA}")
string(REGEX REPLACE "[^0-9: /-]+" "" GIT_DATE "${GIT_DATE}")
option(WITH_MD_LIBRARY "build with MD" ON)
if(WIN32 AND MSVC)
if(WITH_MD_LIBRARY)
@ -180,12 +211,15 @@ if(WIN32 AND MSVC)
endif()
endif()
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall -pthread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing -Wno-invalid-offsetof")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes")
endif()
@ -233,13 +267,6 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
endif(HAS_ARMV8_CRC)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
CHECK_C_COMPILER_FLAG("-march=native" HAS_S390X_MARCH_NATIVE)
if(HAS_S390X_MARCH_NATIVE)
message(STATUS " HAS_S390X_MARCH_NATIVE yes")
endif(HAS_S390X_MARCH_NATIVE)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "s390x")
option(PORTABLE "build a portable binary" OFF)
option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF)
option(FORCE_AVX "force building with AVX, even when PORTABLE=ON" OFF)
@ -265,17 +292,12 @@ if(PORTABLE)
if(FORCE_AVX2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi -mlzcnt")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z196")
endif()
endif()
else()
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
else()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x" AND NOT HAS_S390X_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z196")
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT HAS_ARMV8_CRC)
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT HAS_ARMV8_CRC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif()
endif()
@ -287,7 +309,6 @@ if(NOT MSVC)
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
endif()
if (NOT PORTABLE OR FORCE_SSE42)
CHECK_CXX_SOURCE_COMPILES("
#include <cstdint>
#include <nmmintrin.h>
@ -306,11 +327,10 @@ int main() {
elseif(FORCE_SSE42)
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
endif()
endif()
# Check if -latomic is required or not
if (NOT MSVC)
set(CMAKE_REQUIRED_FLAGS "--std=c++17")
set(CMAKE_REQUIRED_FLAGS "--std=c++11")
CHECK_CXX_SOURCE_COMPILES("
#include <atomic>
std::atomic<uint64_t> x(0);
@ -327,18 +347,36 @@ int main() {
endif()
if (WITH_LIBURING)
find_package(uring)
if (uring_FOUND)
set(CMAKE_REQUIRED_FLAGS "-luring")
CHECK_CXX_SOURCE_COMPILES("
#include <liburing.h>
int main() {
struct io_uring ring;
io_uring_queue_init(1, &ring, 0);
return 0;
}
" HAS_LIBURING)
if (HAS_LIBURING)
add_definitions(-DROCKSDB_IOURING_PRESENT)
list(APPEND THIRDPARTY_LIBS uring::uring)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -luring")
endif()
endif()
# Reset the required flags
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
# thread_local is part of C++11 and later (TODO: clean up this define)
CHECK_CXX_SOURCE_COMPILES("
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
(void)tls;
}
" HAVE_THREAD_LOCAL)
if(HAVE_THREAD_LOCAL)
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
endif()
option(WITH_IOSTATS_CONTEXT "Enable IO stats context" ON)
if (NOT WITH_IOSTATS_CONTEXT)
@ -371,7 +409,7 @@ endif()
option(WITH_TSAN "build with TSAN" OFF)
if(WITH_TSAN)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -Wl,-pie")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -pie")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -fPIC")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread -fPIC")
if(WITH_JEMALLOC)
@ -422,20 +460,9 @@ if (ASSERT_STATUS_CHECKED)
add_definitions(-DROCKSDB_ASSERT_STATUS_CHECKED)
endif()
# RTTI is by default AUTO which enables it in debug and disables it in release.
set(USE_RTTI AUTO CACHE STRING "Enable RTTI in builds")
set_property(CACHE USE_RTTI PROPERTY STRINGS AUTO ON OFF)
if(USE_RTTI STREQUAL "AUTO")
message(STATUS "Enabling RTTI in Debug builds only (default)")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
if(MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-")
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
elseif(USE_RTTI)
message(STATUS "Enabling RTTI in all builds")
if(DEFINED USE_RTTI)
if(USE_RTTI)
message(STATUS "Enabling RTTI")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DROCKSDB_USE_RTTI")
else()
@ -444,11 +471,20 @@ else()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-")
else()
message(STATUS "Disabling RTTI in all builds")
message(STATUS "Disabling RTTI in Release builds")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
endif()
else()
message(STATUS "Enabling RTTI in Debug builds only (default)")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
if(MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-")
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
endif()
# Used to run CI build and tests so we can run faster
option(OPTDBG "Build optimized debug build with MSVC" OFF)
@ -576,16 +612,10 @@ if(HAVE_AUXV_GETAUXVAL)
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
endif()
check_cxx_symbol_exists(F_FULLFSYNC "fcntl.h" HAVE_FULLFSYNC)
if(HAVE_FULLFSYNC)
add_definitions(-DHAVE_FULLFSYNC)
endif()
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/include)
if(USE_FOLLY)
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
include_directories(${PROJECT_SOURCE_DIR}/third-party/folly)
add_definitions(-DUSE_FOLLY -DFOLLY_NO_CONFIG)
endif()
find_package(Threads REQUIRED)
@ -594,11 +624,7 @@ find_package(Threads REQUIRED)
set(SOURCES
cache/cache.cc
cache/cache_entry_roles.cc
cache/cache_key.cc
cache/cache_reservation_manager.cc
cache/clock_cache.cc
cache/compressed_secondary_cache.cc
cache/fast_lru_cache.cc
cache/lru_cache.cc
cache/sharded_cache.cc
db/arena_wrapped_db_iter.cc
@ -613,7 +639,6 @@ set(SOURCES
db/blob/blob_log_format.cc
db/blob/blob_log_sequential_reader.cc
db/blob/blob_log_writer.cc
db/blob/prefetch_buffer_collection.cc
db/builder.cc
db/c.cc
db/column_family.cc
@ -682,11 +707,11 @@ set(SOURCES
env/env.cc
env/env_chroot.cc
env/env_encryption.cc
env/env_hdfs.cc
env/file_system.cc
env/file_system_tracer.cc
env/fs_remap.cc
env/mock_env.cc
env/unique_id_gen.cc
file/delete_scheduler.cc
file/file_prefetch_buffer.cc
file/file_util.cc
@ -705,7 +730,6 @@ set(SOURCES
memory/concurrent_arena.cc
memory/jemalloc_nodump_allocator.cc
memory/memkind_kmem_allocator.cc
memory/memory_allocator.cc
memtable/alloc_tracker.cc
memtable/hash_linklist_rep.cc
memtable/hash_skiplist_rep.cc
@ -781,7 +805,6 @@ set(SOURCES
table/table_factory.cc
table/table_properties.cc
table/two_level_iterator.cc
table/unique_id.cc
test_util/sync_point.cc
test_util/sync_point_impl.cc
test_util/testutil.cc
@ -799,11 +822,9 @@ set(SOURCES
trace_replay/trace_record_result.cc
trace_replay/trace_record.cc
trace_replay/trace_replay.cc
util/cleanable.cc
util/coding.cc
util/compaction_job_stats_impl.cc
util/comparator.cc
util/compression.cc
util/compression_context_cache.cc
util/concurrent_task_limiter_impl.cc
util/crc32c.cc
@ -820,29 +841,24 @@ set(SOURCES
util/thread_local.cc
util/threadpool_imp.cc
util/xxhash.cc
utilities/agg_merge/agg_merge.cc
utilities/backup/backup_engine.cc
utilities/backupable/backupable_db.cc
utilities/blob_db/blob_compaction_filter.cc
utilities/blob_db/blob_db.cc
utilities/blob_db/blob_db_impl.cc
utilities/blob_db/blob_db_impl_filesnapshot.cc
utilities/blob_db/blob_dump_tool.cc
utilities/blob_db/blob_file.cc
utilities/cache_dump_load.cc
utilities/cache_dump_load_impl.cc
utilities/cassandra/cassandra_compaction_filter.cc
utilities/cassandra/format.cc
utilities/cassandra/merge_operator.cc
utilities/checkpoint/checkpoint_impl.cc
utilities/compaction_filters.cc
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
utilities/counted_fs.cc
utilities/debug.cc
utilities/env_mirror.cc
utilities/env_timed.cc
utilities/fault_injection_env.cc
utilities/fault_injection_fs.cc
utilities/fault_injection_secondary_cache.cc
utilities/leveldb_options/leveldb_options.cc
utilities/memory/memory_util.cc
utilities/merge_operators.cc
@ -884,7 +900,6 @@ set(SOURCES
utilities/transactions/write_unprepared_txn.cc
utilities/transactions/write_unprepared_txn_db.cc
utilities/ttl/db_ttl_impl.cc
utilities/wal_filter.cc
utilities/write_batch_with_index/write_batch_with_index.cc
utilities/write_batch_with_index/write_batch_with_index_internal.cc)
@ -902,31 +917,6 @@ list(APPEND SOURCES
utilities/transactions/lock/range/range_tree/lib/util/dbt.cc
utilities/transactions/lock/range/range_tree/lib/util/memarena.cc)
message(STATUS "ROCKSDB_PLUGINS: ${ROCKSDB_PLUGINS}")
if ( ROCKSDB_PLUGINS )
string(REPLACE " " ";" PLUGINS ${ROCKSDB_PLUGINS})
foreach (plugin ${PLUGINS})
add_subdirectory("plugin/${plugin}")
foreach (src ${${plugin}_SOURCES})
list(APPEND SOURCES plugin/${plugin}/${src})
set_source_files_properties(
plugin/${plugin}/${src}
PROPERTIES COMPILE_FLAGS "${${plugin}_COMPILE_FLAGS}")
endforeach()
foreach (path ${${plugin}_INCLUDE_PATHS})
include_directories(${path})
endforeach()
foreach (lib ${${plugin}_LIBS})
list(APPEND THIRDPARTY_LIBS ${lib})
endforeach()
foreach (link_path ${${plugin}_LINK_PATHS})
link_directories(AFTER ${link_path})
endforeach()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${${plugin}_CMAKE_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${${plugin}_CMAKE_EXE_LINKER_FLAGS}")
endforeach()
endif()
if(HAVE_SSE42 AND NOT MSVC)
set_source_files_properties(
util/crc32c.cc
@ -970,12 +960,13 @@ else()
env/io_posix.cc)
endif()
if(USE_FOLLY)
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
list(APPEND SOURCES
third-party/folly/folly/container/detail/F14Table.cpp
third-party/folly/folly/lang/SafeAssert.cpp
third-party/folly/folly/lang/ToAscii.cpp
third-party/folly/folly/ScopeGuard.cpp)
third-party/folly/folly/detail/Futex.cpp
third-party/folly/folly/synchronization/AtomicNotification.cpp
third-party/folly/folly/synchronization/DistributedMutex.cpp
third-party/folly/folly/synchronization/ParkingLot.cpp
third-party/folly/folly/synchronization/WaitOptions.cpp)
endif()
set(ROCKSDB_STATIC_LIB rocksdb${ARTIFACT_SUFFIX})
@ -983,6 +974,12 @@ set(ROCKSDB_SHARED_LIB rocksdb-shared${ARTIFACT_SUFFIX})
option(ROCKSDB_BUILD_SHARED "Build shared versions of the RocksDB libraries" ON)
option(WITH_LIBRADOS "Build with librados" OFF)
if(WITH_LIBRADOS)
list(APPEND SOURCES
utilities/env_librados.cc)
list(APPEND THIRDPARTY_LIBS rados)
endif()
if(WIN32)
set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib)
@ -990,60 +987,6 @@ else()
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT})
endif()
set(ROCKSDB_PLUGIN_EXTERNS "")
set(ROCKSDB_PLUGIN_BUILTINS "")
message(STATUS "ROCKSDB PLUGINS TO BUILD ${ROCKSDB_PLUGINS}")
list(APPEND PLUGINS ${ROCKSDB_PLUGINS})
foreach(PLUGIN IN LISTS PLUGINS)
set(PLUGIN_ROOT "${CMAKE_SOURCE_DIR}/plugin/${PLUGIN}/")
message("including rocksb plugin ${PLUGIN_ROOT}")
set(PLUGINMKFILE "${PLUGIN_ROOT}${PLUGIN}.mk")
if (NOT EXISTS ${PLUGINMKFILE})
message(FATAL_ERROR "Missing plugin makefile: ${PLUGINMKFILE}")
endif()
file(READ ${PLUGINMKFILE} PLUGINMK)
string(REGEX MATCH "SOURCES = ([^\n]*)" FOO ${PLUGINMK})
set(MK_SOURCES ${CMAKE_MATCH_1})
separate_arguments(MK_SOURCES)
foreach(MK_FILE IN LISTS MK_SOURCES)
list(APPEND SOURCES "${PLUGIN_ROOT}${MK_FILE}")
endforeach()
string(REGEX MATCH "_FUNC = ([^\n]*)" FOO ${PLUGINMK})
if (NOT ${CMAKE_MATCH_1} STREQUAL "")
string(APPEND ROCKSDB_PLUGIN_BUILTINS "{\"${PLUGIN}\", " ${CMAKE_MATCH_1} "},")
string(APPEND ROCKSDB_PLUGIN_EXTERNS "int " ${CMAKE_MATCH_1} "(ROCKSDB_NAMESPACE::ObjectLibrary&, const std::string&); ")
endif()
string(REGEX MATCH "_LIBS = ([^\n]*)" FOO ${PLUGINMK})
if (NOT ${CMAKE_MATCH_1} STREQUAL "")
list(APPEND THIRDPARTY_LIBS "${CMAKE_MATCH_1}")
endif()
message("THIRDPARTY_LIBS=${THIRDPARTY_LIBS}")
#TODO: We need to set any compile/link-time flags and add any link libraries
endforeach()
string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC)
set(BUILD_DATE "${TS}" CACHE STRING "the time we first built rocksdb")
find_package(Git)
if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD )
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE GIT_MOD COMMAND "${GIT_EXECUTABLE}" diff-index HEAD --quiet)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE COMMAND "${GIT_EXECUTABLE}" log -1 --date=format:"%Y-%m-%d %T" --format="%ad")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG RESULT_VARIABLE rv COMMAND "${GIT_EXECUTABLE}" symbolic-ref -q --short HEAD OUTPUT_STRIP_TRAILING_WHITESPACE)
if (rv AND NOT rv EQUAL 0)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
else()
set(GIT_SHA 0)
set(GIT_MOD 1)
endif()
string(REGEX REPLACE "[^0-9a-fA-F]+" "" GIT_SHA "${GIT_SHA}")
string(REGEX REPLACE "[^0-9: /-]+" "" GIT_DATE "${GIT_DATE}")
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY)
add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES} ${BUILD_VERSION_CC})
target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE
${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
@ -1123,12 +1066,6 @@ if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDOWS)
COMPATIBILITY SameMajorVersion
)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.pc.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
@ONLY
)
install(DIRECTORY include/rocksdb COMPONENT devel DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
install(DIRECTORY "${PROJECT_SOURCE_DIR}/cmake/modules" COMPONENT devel DESTINATION ${package_config_destination})
@ -1167,13 +1104,6 @@ if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDOWS)
COMPONENT devel
DESTINATION ${package_config_destination}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
COMPONENT devel
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
)
endif()
option(WITH_ALL_TESTS "Build all test, rather than a small subset" ON)
@ -1193,9 +1123,7 @@ if(WITH_TESTS)
)
if(WITH_ALL_TESTS)
list(APPEND TESTS
cache/cache_reservation_manager_test.cc
cache/cache_test.cc
cache/compressed_secondary_cache_test.cc
cache/lru_cache_test.cc
db/blob/blob_counting_iterator_test.cc
db/blob/blob_file_addition_test.cc
@ -1225,7 +1153,6 @@ if(WITH_TESTS)
db/db_compaction_filter_test.cc
db/db_compaction_test.cc
db/db_dynamic_level_test.cc
db/db_encryption_test.cc
db/db_flush_test.cc
db/db_inplace_update_test.cc
db/db_io_failure_test.cc
@ -1240,7 +1167,6 @@ if(WITH_TESTS)
db/db_options_test.cc
db/db_properties_test.cc
db/db_range_del_test.cc
db/db_rate_limiter_test.cc
db/db_secondary_test.cc
db/db_sst_test.cc
db/db_statistics_test.cc
@ -1252,7 +1178,6 @@ if(WITH_TESTS)
db/db_universal_compaction_test.cc
db/db_wal_test.cc
db/db_with_timestamp_compaction_test.cc
db/db_write_buffer_manager_test.cc
db/db_write_test.cc
db/dbformat_test.cc
db/deletefile_test.cc
@ -1264,7 +1189,6 @@ if(WITH_TESTS)
db/file_indexer_test.cc
db/filename_test.cc
db/flush_job_test.cc
db/import_column_family_test.cc
db/listener_test.cc
db/log_test.cc
db/manual_compaction_test.cc
@ -1298,7 +1222,7 @@ if(WITH_TESTS)
logging/env_logger_test.cc
logging/event_logger_test.cc
memory/arena_test.cc
memory/memory_allocator_test.cc
memory/memkind_kmem_allocator_test.cc
memtable/inlineskiplist_test.cc
memtable/skiplist_test.cc
memtable/write_buffer_manager_test.cc
@ -1353,15 +1277,13 @@ if(WITH_TESTS)
util/thread_list_test.cc
util/thread_local_test.cc
util/work_queue_test.cc
utilities/agg_merge/agg_merge_test.cc
utilities/backup/backup_engine_test.cc
utilities/backupable/backupable_db_test.cc
utilities/blob_db/blob_db_test.cc
utilities/cassandra/cassandra_functional_test.cc
utilities/cassandra/cassandra_format_test.cc
utilities/cassandra/cassandra_row_merge_test.cc
utilities/cassandra/cassandra_serialize_test.cc
utilities/checkpoint/checkpoint_test.cc
utilities/env_timed_test.cc
utilities/memory/memory_test.cc
utilities/merge_operators/string_append/stringappend_test.cc
utilities/object_registry_test.cc
@ -1375,21 +1297,25 @@ if(WITH_TESTS)
utilities/transactions/optimistic_transaction_test.cc
utilities/transactions/transaction_test.cc
utilities/transactions/lock/point/point_lock_manager_test.cc
utilities/transactions/write_committed_transaction_ts_test.cc
utilities/transactions/write_prepared_transaction_test.cc
utilities/transactions/write_unprepared_transaction_test.cc
utilities/transactions/lock/range/range_locking_test.cc
utilities/ttl/ttl_test.cc
utilities/util_merge_operators_test.cc
utilities/write_batch_with_index/write_batch_with_index_test.cc
)
endif()
if(WITH_LIBRADOS)
list(APPEND TESTS utilities/env_librados_test.cc)
endif()
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
list(APPEND TESTS third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp)
endif()
set(TESTUTIL_SOURCE
db/db_test_util.cc
monitoring/thread_status_updater_debug.cc
table/mock_table.cc
utilities/agg_merge/test_agg_merge.cc
utilities/cassandra/test_utils.cc
)
enable_testing()
@ -1420,6 +1346,10 @@ if(WITH_TESTS)
gtest_discover_tests(${exename} DISCOVERY_TIMEOUT 120)
add_dependencies(check ${exename}${ARTIFACT_SUFFIX})
endif()
if("${exename}" MATCHES "env_librados_test")
# env_librados_test.cc uses librados directly
target_link_libraries(${exename}${ARTIFACT_SUFFIX} rados)
endif()
endforeach(sourcefile ${TESTS})
if(WIN32)

View File

@ -1,4 +1,4 @@
# RocksDB default options change log (NO LONGER MAINTAINED)
# RocksDB default options change log
## Unreleased
* delayed_write_rate takes the rate given by rate_limiter if not specified.

View File

@ -1,381 +1,11 @@
# Rocksdb Change Log
## Unreleased
## 6.24.2 (2021-09-16)
### Bug Fixes
* Fixed a bug where manual flush would block forever even though flush options had wait=false.
* Fixed a bug where RocksDB could corrupt DBs with `avoid_flush_during_recovery == true` by removing valid WALs, leading to `Status::Corruption` with message like "SST file is ahead of WALs" when attempting to reopen.
* Fixed a bug in async_io path where incorrect length of data is read by FilePrefetchBuffer if data is consumed from two populated buffers and request for more data is sent.
* Fixed a CompactionFilter bug. Compaction filter used to use `Delete` to remove keys, even if the keys should be removed with `SingleDelete`. Mixing `Delete` and `SingleDelete` may cause undefined behavior.
* Fixed a bug which might cause process crash when I/O error happens when reading an index block in MultiGet().
### New Features
* DB::GetLiveFilesStorageInfo is ready for production use.
* Add new stats PREFETCHED_BYTES_DISCARDED which records number of prefetched bytes discarded by RocksDB FilePrefetchBuffer on destruction and POLL_WAIT_MICROS records wait time for FS::Poll API completion.
### Public API changes
* Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions.
* EXPERIMENTAL: Add new API AbortIO in file_system to abort the read requests submitted asynchronously.
* CompactionFilter::Decision has a new value: kRemoveWithSingleDelete. If CompactionFilter returns this decision, then CompactionIterator will use `SingleDelete` to mark a key as removed.
* Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys.
* Added ability to specify functions for Prepare and Validate to OptionsTypeInfo. Added methods to OptionTypeInfo to set the functions via an API. These methods are intended for RocksDB plugin developers for configuration management.
* Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate.
### Bug Fixes
* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
* Fixed unnecessary block cache contention when queries within a MultiGet batch and across parallel batches access the same data block, which previously could cause severely degraded performance in this unusual case. (In more typical MultiGet cases, this fix is expected to yield a small or negligible performance improvement.)
### Behavior changes
* Enforce the existing contract of SingleDelete so that SingleDelete cannot be mixed with Delete because it leads to undefined behavior. Fix a number of unit tests that violate the contract but happen to pass.
* ldb `--try_load_options` default to true if `--db` is specified and not creating a new DB, the user can still explicitly disable that by `--try_load_options=false` (or explicitly enable that by `--try_load_options`).
## 7.2.0 (04/15/2022)
### Bug Fixes
* Fixed bug which caused rocksdb failure in the situation when rocksdb was accessible using UNC path
* Fixed a race condition when 2PC is disabled and WAL tracking in the MANIFEST is enabled. The race condition is between two background flush threads trying to install flush results, causing a WAL deletion not tracked in the MANIFEST. A future DB open may fail.
* Fixed a heap use-after-free race with DropColumnFamily.
* Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722).
* Fixed `file_type`, `relative_filename` and `directory` fields returned by `GetLiveFilesMetaData()`, which were added in inheriting from `FileStorageInfo`.
* Fixed a bug affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#9766).
* Fix segfault in FilePrefetchBuffer with async_io as it doesn't wait for pending jobs to complete on destruction.
* Fix ERROR_HANDLER_AUTORESUME_RETRY_COUNT stat whose value was set wrong in portal.h
* Fixed a bug for non-TransactionDB with avoid_flush_during_recovery = true and TransactionDB where in case of crash, min_log_number_to_keep may not change on recovery and persisting a new MANIFEST with advanced log_numbers for some column families, results in "column family inconsistency" error on second recovery. As a solution the corrupted WALs whose numbers are larger than the corrupted wal and smaller than the new WAL will be moved to archive folder.
* Fixed a bug in RocksDB DB::Open() which may creates and writes to two new MANIFEST files even before recovery succeeds. Now writes to MANIFEST are persisted only after recovery is successful.
### New Features
* For db_bench when --seed=0 or --seed is not set then it uses the current time as the seed value. Previously it used the value 1000.
* For db_bench when --benchmark lists multiple tests and each test uses a seed for a RNG then the seeds across tests will no longer be repeated.
* Added an option to dynamically charge an updating estimated memory usage of block-based table reader to block cache if block cache available. To enable this feature, set `BlockBasedTableOptions::reserve_table_reader_memory = true`.
* Add new stat ASYNC_READ_BYTES that calculates number of bytes read during async read call and users can check if async code path is being called by RocksDB internal automatic prefetching for sequential reads.
* Enable async prefetching if ReadOptions.readahead_size is set along with ReadOptions.async_io in FilePrefetchBuffer.
* Add event listener support on remote compaction compactor side.
* Added a dedicated integer DB property `rocksdb.live-blob-file-garbage-size` that exposes the total amount of garbage in the blob files in the current version.
* RocksDB does internal auto prefetching if it notices sequential reads. It starts with readahead size `initial_auto_readahead_size` which now can be configured through BlockBasedTableOptions.
* Add a merge operator that allows users to register specific aggregation function so that they can does aggregation using different aggregation types for different keys. See comments in include/rocksdb/utilities/agg_merge.h for actual usage. The feature is experimental and the format is subject to change and we won't provide a migration tool.
* Meta-internal / Experimental: Improve CPU performance by replacing many uses of std::unordered_map with folly::F14FastMap when RocksDB is compiled together with Folly.
* Experimental: Add CompressedSecondaryCache, a concrete implementation of rocksdb::SecondaryCache, that integrates with compression libraries (e.g. LZ4) to hold compressed blocks.
### Behavior changes
* Disallow usage of commit-time-write-batch for write-prepared/write-unprepared transactions if TransactionOptions::use_only_the_last_commit_time_batch_for_recovery is false to prevent two (or more) uncommitted versions of the same key in the database. Otherwise, bottommost compaction may violate the internal key uniqueness invariant of SSTs if the sequence numbers of both internal keys are zeroed out (#9794).
* Make DB::GetUpdatesSince() return NotSupported early for write-prepared/write-unprepared transactions, as the API contract indicates.
### Public API changes
* Exposed APIs to examine results of block cache stats collections in a structured way. In particular, users of `GetMapProperty()` with property `kBlockCacheEntryStats` can now use the functions in `BlockCacheEntryStatsMapKeys` to find stats in the map.
* Add `fail_if_not_bottommost_level` to IngestExternalFileOptions so that ingestion will fail if the file(s) cannot be ingested to the bottommost level.
* Add output parameter `is_in_sec_cache` to `SecondaryCache::Lookup()`. It is to indicate whether the handle is possibly erased from the secondary cache after the Lookup.
## 7.1.0 (03/23/2022)
### New Features
* Allow WriteBatchWithIndex to index a WriteBatch that includes keys with user-defined timestamps. The index itself does not have timestamp.
* Add support for user-defined timestamps to write-committed transaction without API change. The `TransactionDB` layer APIs do not allow timestamps because we require that all user-defined-timestamps-aware operations go through the `Transaction` APIs.
* Added BlobDB options to `ldb`
* `BlockBasedTableOptions::detect_filter_construct_corruption` can now be dynamically configured using `DB::SetOptions`.
* Automatically recover from retryable read IO errors during backgorund flush/compaction.
* Experimental support for preserving file Temperatures through backup and restore, and for updating DB metadata for outside changes to file Temperature (`UpdateManifestForFilesState` or `ldb update_manifest --update_temperatures`).
* Experimental support for async_io in ReadOptions which is used by FilePrefetchBuffer to prefetch some of the data asynchronously, if reads are sequential and auto readahead is enabled by rocksdb internally.
### Bug Fixes
* Fixed a major performance bug in which Bloom filters generated by pre-7.0 releases are not read by early 7.0.x releases (and vice-versa) due to changes to FilterPolicy::Name() in #9590. This can severely impact read performance and read I/O on upgrade or downgrade with existing DB, but not data correctness.
* Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496)
* Fixed a bug caused by race among flush, incoming writes and taking snapshots. Queries to snapshots created with these race condition can return incorrect result, e.g. resurfacing deleted data.
* Fixed a bug that DB flush uses `options.compression` even `options.compression_per_level` is set.
* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction.
* Fix a race condition when cancel manual compaction with `DisableManualCompaction`. Also DB close can cancel the manual compaction thread.
* Fixed a potential timer crash when open close DB concurrently.
* Fixed a race condition for `alive_log_files_` in non-two-write-queues mode. The race is between the write_thread_ in WriteToWAL() and another thread executing `FindObsoleteFiles()`. The race condition will be caught if `__glibcxx_requires_nonempty` is enabled.
* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
* Fixed a race condition when disable and re-enable manual compaction.
* Fixed automatic error recovery failure in atomic flush.
* Fixed a race condition when mmaping a WritableFile on POSIX.
### Public API changes
* Added pure virtual FilterPolicy::CompatibilityName(), which is needed for fixing major performance bug involving FilterPolicy naming in SST metadata without affecting Customizable aspect of FilterPolicy. This change only affects those with their own custom or wrapper FilterPolicy classes.
* `options.compression_per_level` is dynamically changeable with `SetOptions()`.
* Added `WriteOptions::rate_limiter_priority`. When set to something other than `Env::IO_TOTAL`, the internal rate limiter (`DBOptions::rate_limiter`) will be charged at the specified priority for writes associated with the API to which the `WriteOptions` was provided. Currently the support covers automatic WAL flushes, which happen during live updates (`Put()`, `Write()`, `Delete()`, etc.) when `WriteOptions::disableWAL == false` and `DBOptions::manual_wal_flush == false`.
* Add DB::OpenAndTrimHistory API. This API will open DB and trim data to the timestamp specified by trim_ts (The data with timestamp larger than specified trim bound will be removed). This API should only be used at a timestamp-enabled column families recovery. If the column family doesn't have timestamp enabled, this API won't trim any data on that column family. This API is not compatible with avoid_flush_during_recovery option.
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.
## 7.0.0 (02/20/2022)
### Bug Fixes
* Fixed a major bug in which batched MultiGet could return old values for keys deleted by DeleteRange when memtable Bloom filter is enabled (memtable_prefix_bloom_size_ratio > 0). (The fix includes a substantial MultiGet performance improvement in the unusual case of both memtable_whole_key_filtering and prefix_extractor.)
* Fixed more cases of EventListener::OnTableFileCreated called with OK status, file_size==0, and no SST file kept. Now the status is Aborted.
* Fixed a read-after-free bug in `DB::GetMergeOperands()`.
* Fix a data loss bug for 2PC write-committed transaction caused by concurrent transaction commit and memtable switch (#9571).
* Fixed NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats to be reported once per MultiGet batch per level.
### Performance Improvements
* Mitigated the overhead of building the file location hash table used by the online LSM tree consistency checks, which can improve performance for certain workloads (see #9351).
* Switched to using a sorted `std::vector` instead of `std::map` for storing the metadata objects for blob files, which can improve performance for certain workloads, especially when the number of blob files is high.
* DisableManualCompaction() doesn't have to wait scheduled manual compaction to be executed in thread-pool to cancel the job.
### Public API changes
* Require C++17 compatible compiler (GCC >= 7, Clang >= 5, Visual Studio >= 2017) for compiling RocksDB and any code using RocksDB headers. See #9388.
* Added `ReadOptions::rate_limiter_priority`. When set to something other than `Env::IO_TOTAL`, the internal rate limiter (`DBOptions::rate_limiter`) will be charged at the specified priority for file reads associated with the API to which the `ReadOptions` was provided.
* Remove HDFS support from main repo.
* Remove librados support from main repo.
* Remove obsolete backupable_db.h and type alias `BackupableDBOptions`. Use backup_engine.h and `BackupEngineOptions`. Similar renamings are in the C and Java APIs.
* Removed obsolete utility_db.h and `UtilityDB::OpenTtlDB`. Use db_ttl.h and `DBWithTTL::Open`.
* Remove deprecated API DB::AddFile from main repo.
* Remove deprecated API ObjectLibrary::Register() and the (now obsolete) Regex public API. Use ObjectLibrary::AddFactory() with PatternEntry instead.
* Remove deprecated option DBOption::table_cache_remove_scan_count_limit.
* Remove deprecated API AdvancedColumnFamilyOptions::soft_rate_limit.
* Remove deprecated API AdvancedColumnFamilyOptions::hard_rate_limit.
* Remove deprecated API DBOption::base_background_compactions.
* Remove deprecated API DBOptions::purge_redundant_kvs_while_flush.
* Remove deprecated overloads of API DB::CompactRange.
* Remove deprecated option DBOptions::skip_log_error_on_recovery.
* Remove ReadOptions::iter_start_seqnum which has been deprecated.
* Remove DBOptions::preserved_deletes and DB::SetPreserveDeletesSequenceNumber().
* Remove deprecated API AdvancedColumnFamilyOptions::rate_limit_delay_max_milliseconds.
* Removed timestamp from WriteOptions. Accordingly, added to DB APIs Put, Delete, SingleDelete, etc. accepting an additional argument 'timestamp'. Added Put, Delete, SingleDelete, etc to WriteBatch accepting an additional argument 'timestamp'. Removed WriteBatch::AssignTimestamps(vector<Slice>) API. Renamed WriteBatch::AssignTimestamp() to WriteBatch::UpdateTimestamps() with clarified comments.
* Changed type of cache buffer passed to `Cache::CreateCallback` from `void*` to `const void*`.
* Significant updates to FilterPolicy-related APIs and configuration:
* Remove public API support for deprecated, inefficient block-based filter (use_block_based_builder=true).
* Old code and configuration strings that would enable it now quietly enable full filters instead, though any built-in FilterPolicy can still read block-based filters. This includes changing the longstanding default behavior of the Java API.
* Remove deprecated FilterPolicy::CreateFilter() and FilterPolicy::KeyMayMatch()
* Remove `rocksdb_filterpolicy_create()` from C API, as the only C API support for custom filter policies is now obsolete.
* If temporary memory usage in full filter creation is a problem, consider using partitioned filters, smaller SST files, or setting reserve_table_builder_memory=true.
* Remove support for "filter_policy=experimental_ribbon" configuration
string. Use something like "filter_policy=ribbonfilter:10" instead.
* Allow configuration string like "filter_policy=bloomfilter:10" without
bool, to minimize acknowledgement of obsolete block-based filter.
* Made FilterPolicy Customizable. Configuration of filter_policy is now accurately saved in OPTIONS file and can be loaded with LoadOptionsFromFile. (Loading an OPTIONS file generated by a previous version only enables reading and using existing filters, not generating new filters. Previously, no filter_policy would be configured from a saved OPTIONS file.)
* Change meaning of nullptr return from GetBuilderWithContext() from "use
block-based filter" to "generate no filter in this case."
* Also, when user specifies bits_per_key < 0.5, we now round this down
to "no filter" because we expect a filter with >= 80% FP rate is
unlikely to be worth the CPU cost of accessing it (esp with
cache_index_and_filter_blocks=1 or partition_filters=1).
* bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP
rate)
* Remove class definitions for FilterBitsBuilder and FilterBitsReader from
public API, so these can evolve more easily as implementation details.
Custom FilterPolicy can still decide what kind of built-in filter to use
under what conditions.
* Also removed deprecated functions
* FilterPolicy::GetFilterBitsBuilder()
* NewExperimentalRibbonFilterPolicy()
* Remove default implementations of
* FilterPolicy::GetBuilderWithContext()
* Remove default implementation of Name() from FileSystemWrapper.
* Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`.
* Remove deprecated option DBOptions::max_mem_compaction_level.
* Return Status::InvalidArgument from ObjectRegistry::NewObject if a factory exists but the object ould not be created (returns NotFound if the factory is missing).
* Remove deprecated overloads of API DB::GetApproximateSizes.
* Remove deprecated option DBOptions::new_table_reader_for_compaction_inputs.
* Add Transaction::SetReadTimestampForValidation() and Transaction::SetCommitTimestamp(). Default impl returns NotSupported().
* Add support for decimal patterns to ObjectLibrary::PatternEntry
* Remove deprecated remote compaction APIs `CompactionService::Start()` and `CompactionService::WaitForComplete()`. Please use `CompactionService::StartV2()`, `CompactionService::WaitForCompleteV2()` instead, which provides the same information plus extra data like priority, db_id, etc.
* `ColumnFamilyOptions::OldDefaults` and `DBOptions::OldDefaults` are marked deprecated, as they are no longer maintained.
* Add subcompaction callback APIs: `OnSubcompactionBegin()` and `OnSubcompactionCompleted()`.
* Add file Temperature information to `FileOperationInfo` in event listener API.
* Change the type of SizeApproximationFlags from enum to enum class. Also update the signature of DB::GetApproximateSizes API from uint8_t to SizeApproximationFlags.
* Add Temperature hints information from RocksDB in API `NewSequentialFile()`. backup and checkpoint operations need to open the source files with `NewSequentialFile()`, which will have the temperature hints. Other operations are not covered.
### Behavior Changes
* Disallow the combination of DBOptions.use_direct_io_for_flush_and_compaction == true and DBOptions.writable_file_max_buffer_size == 0. This combination can cause WritableFileWriter::Append() to loop forever, and it does not make much sense in direct IO.
* `ReadOptions::total_order_seek` no longer affects `DB::Get()`. The original motivation for this interaction has been obsolete since RocksDB has been able to detect whether the current prefix extractor is compatible with that used to generate table files, probably RocksDB 5.14.0.
## New Features
* Introduced an option `BlockBasedTableOptions::detect_filter_construct_corruption` for detecting corruption during Bloom Filter (format_version >= 5) and Ribbon Filter construction.
* Improved the SstDumpTool to read the comparator from table properties and use it to read the SST File.
* Extended the column family statistics in the info log so the total amount of garbage in the blob files and the blob file space amplification factor are also logged. Also exposed the blob file space amp via the `rocksdb.blob-stats` DB property.
* Introduced the API rocksdb_create_dir_if_missing in c.h that calls underlying file system's CreateDirIfMissing API to create the directory.
* Added last level and non-last level read statistics: `LAST_LEVEL_READ_*`, `NON_LAST_LEVEL_READ_*`.
* Experimental: Add support for new APIs ReadAsync in FSRandomAccessFile that reads the data asynchronously and Poll API in FileSystem that checks if requested read request has completed or not. ReadAsync takes a callback function. Poll API checks for completion of read IO requests and should call callback functions to indicate completion of read requests.
## 6.29.0 (01/21/2022)
Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info.
### Public API change
* Added values to `TraceFilterType`: `kTraceFilterIteratorSeek`, `kTraceFilterIteratorSeekForPrev`, and `kTraceFilterMultiGet`. They can be set in `TraceOptions` to filter out the operation types after which they are named.
* Added `TraceOptions::preserve_write_order`. When enabled it guarantees write records are traced in the same order they are logged to WAL and applied to the DB. By default it is disabled (false) to match the legacy behavior and prevent regression.
* Made the Env class extend the Customizable class. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* `Options::OldDefaults` is marked deprecated, as it is no longer maintained.
* Add ObjectLibrary::AddFactory and ObjectLibrary::PatternEntry classes. This method and associated class are the preferred mechanism for registering factories with the ObjectLibrary going forward. The ObjectLibrary::Register method, which uses regular expressions and may be problematic, is deprecated and will be in a future release.
* Changed `BlockBasedTableOptions::block_size` from `size_t` to `uint64_t`.
* Added API warning against using `Iterator::Refresh()` together with `DB::DeleteRange()`, which are incompatible and have always risked causing the refreshed iterator to return incorrect results.
* Made `AdvancedColumnFamilyOptions.bottommost_temperature` dynamically changeable with `SetOptions()`.
### Behavior Changes
* `DB::DestroyColumnFamilyHandle()` will return Status::InvalidArgument() if called with `DB::DefaultColumnFamily()`.
* On 32-bit platforms, mmap reads are no longer quietly disabled, just discouraged.
### New Features
* Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.)
### Performance Improvements
* Improved read performance when a prefix extractor is used (Seek, Get, MultiGet), even compared to version 6.25 baseline (see bug fix below), by optimizing the common case of prefix extractor compatible with table file and unchanging.
### Bug Fixes
* Fix a bug that FlushMemTable may return ok even flush not succeed.
* Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS.
* Fixed a significant performance regression in version 6.26 when a prefix extractor is used on the read path (Seek, Get, MultiGet). (Excessive time was spent in SliceTransform::AsString().)
* Fixed a race condition in SstFileManagerImpl error recovery code that can cause a crash during process shutdown.
### New Features
* Added RocksJava support for MacOS universal binary (ARM+x86)
## 6.28.0 (2021-12-17)
### New Features
* Introduced 'CommitWithTimestamp' as a new tag. Currently, there is no API for user to trigger a write with this tag to the WAL. This is part of the efforts to support write-commited transactions with user-defined timestamps.
* Introduce SimulatedHybridFileSystem which can help simulating HDD latency in db_bench. Tiered Storage latency simulation can be enabled using -simulate_hybrid_fs_file (note that it doesn't work if db_bench is interrupted in the middle). -simulate_hdd can also be used to simulate all files on HDD.
### Bug Fixes
* Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next.
* Fixed a bug in TableOptions.prepopulate_block_cache which causes segmentation fault when used with TableOptions.partition_filters = true and TableOptions.cache_index_and_filter_blocks = true.
* Fixed a bug affecting custom memtable factories which are not registered with the `ObjectRegistry`. The bug could result in failure to save the OPTIONS file.
* Fixed a bug causing two duplicate entries to be appended to a file opened in non-direct mode and tracked by `FaultInjectionTestFS`.
* Fixed a bug in TableOptions.prepopulate_block_cache to support block-based filters also.
* Block cache keys no longer use `FSRandomAccessFile::GetUniqueId()` (previously used when available), so a filesystem recycling unique ids can no longer lead to incorrect result or crash (#7405). For files generated by RocksDB >= 6.24, the cache keys are stable across DB::Open and DB directory move / copy / import / export / migration, etc. Although collisions are still theoretically possible, they are (a) impossible in many common cases, (b) not dependent on environmental factors, and (c) much less likely than a CPU miscalculation while executing RocksDB.
* Fixed a bug in C bindings causing iterator to return incorrect result (#9343).
### Behavior Changes
* MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371.
### Public API change
* Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes.
* Introduce a new EventListener callback that will be called upon the end of automatic error recovery.
* Add IncreaseFullHistoryTsLow API so users can advance each column family's full_history_ts_low seperately.
* Add GetFullHistoryTsLow API so users can query current full_history_low value of specified column family.
### Performance Improvements
* Replaced map property `TableProperties::properties_offsets` with uint64_t property `external_sst_file_global_seqno_offset` to save table properties's memory.
* Block cache accesses are faster by RocksDB using cache keys of fixed size (16 bytes).
### Java API Changes
* Removed Java API `TableProperties.getPropertiesOffsets()` as it exposed internal details to external users.
## 6.27.0 (2021-11-19)
### New Features
* Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware.
* Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file.
* Provided support for tracking per-sst user-defined timestamp information in MANIFEST.
* Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching.
* Added the read count and read bytes related stats to Statistics for tiered storage hot, warm, and cold file reads.
* Added an option to dynamically charge an updating estimated memory usage of block-based table building to block cache if block cache available. It currently only includes charging memory usage of constructing (new) Bloom Filter and Ribbon Filter to block cache. To enable this feature, set `BlockBasedTableOptions::reserve_table_builder_memory = true`.
* Add a new API OnIOError in listener.h that notifies listeners when an IO error occurs during FileSystem operation along with filename, status etc.
* Added compaction readahead support for blob files to the integrated BlobDB implementation, which can improve compaction performance when the database resides on higher-latency storage like HDDs or remote filesystems. Readahead can be configured using the column family option `blob_compaction_readahead_size`.
### Bug Fixes
* Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption.
* Fixed a bug in CompactionIterator when write-prepared transaction is used. A released earliest write conflict snapshot may cause assertion failure in dbg mode and unexpected key in opt mode.
* Fix ticker WRITE_WITH_WAL("rocksdb.write.wal"), this bug is caused by a bad extra `RecordTick(stats_, WRITE_WITH_WAL)` (at 2 place), this fix remove the extra `RecordTick`s and fix the corresponding test case.
* EventListener::OnTableFileCreated was previously called with OK status and file_size==0 in cases of no SST file contents written (because there was no content to add) and the empty file deleted before calling the listener. Now the status is Aborted.
* Fixed a bug in CompactionIterator when write-preared transaction is used. Releasing earliest_snapshot during compaction may cause a SingleDelete to be output after a PUT of the same user key whose seq has been zeroed.
* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`.
* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete.
* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer.
* In some cases outside of the DB read and compaction paths, SST block checksums are now checked where they were not before.
* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.)
* Users who configured a dedicated thread pool for bottommost compactions by explicitly adding threads to the `Env::Priority::BOTTOM` pool will no longer see RocksDB schedule automatic compactions exceeding the DB's compaction concurrency limit. For details on per-DB compaction concurrency limit, see API docs of `max_background_compactions` and `max_background_jobs`.
* Fixed a bug of background flush thread picking more memtables to flush and prematurely advancing column family's log_number.
* Fixed an assertion failure in ManifestTailer.
* Fixed a bug that could, with WAL enabled, cause backups, checkpoints, and `GetSortedWalFiles()` to fail randomly with an error like `IO error: 001234.log: No such file or directory`
### Behavior Changes
* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files.
* `TransactionUtil::CheckKeyForConflicts` can also perform conflict-checking based on user-defined timestamps in addition to sequence numbers.
* Removed `GenericRateLimiter`'s minimum refill bytes per period previously enforced.
### Public API change
* When options.ttl is used with leveled compaction with compactinon priority kMinOverlappingRatio, files exceeding half of TTL value will be prioritized more, so that by the time TTL is reached, fewer extra compactions will be scheduled to clear them up. At the same time, when compacting files with data older than half of TTL, output files may be cut off based on those files' boundaries, in order for the early TTL compaction to work properly.
* Made FileSystem and RateLimiter extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Clarified in API comments that RocksDB is not exception safe for callbacks and custom extensions. An exception propagating into RocksDB can lead to undefined behavior, including data loss, unreported corruption, deadlocks, and more.
* Marked `WriteBufferManager` as `final` because it is not intended for extension.
* Removed unimportant implementation details from table_properties.h
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
* In builds where glibc provides `gettid()`, info log ("LOG" file) lines now print a system-wide thread ID from `gettid()` instead of the process-local `pthread_self()`. For all users, the thread ID format is changed from hexadecimal to decimal integer.
* In builds where glibc provides `pthread_setname_np()`, the background thread names no longer contain an ID suffix. For example, "rocksdb:bottom7" (and all other threads in the `Env::Priority::BOTTOM` pool) are now named "rocksdb:bottom". Previously large thread pools could breach the name size limit (e.g., naming "rocksdb:bottom10" would fail).
* Deprecating `ReadOptions::iter_start_seqnum` and `DBOptions::preserve_deletes`, please try using user defined timestamp feature instead. The options will be removed in a future release, currently it logs a warning message when using.
### Performance Improvements
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
### Behavior Changes
* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files.
## 6.26.0 (2021-10-20)
### Bug Fixes
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.
* Fix the incorrect disabling of SST rate limited deletion when the WAL and DB are in different directories. Only WAL rate limited deletion should be disabled if its in a different directory.
* Fix `DisableManualCompaction()` to cancel compactions even when they are waiting on automatic compactions to drain due to `CompactRangeOptions::exclusive_manual_compactions == true`.
* Fix contract of `Env::ReopenWritableFile()` and `FileSystem::ReopenWritableFile()` to specify any existing file must not be deleted or truncated.
* Fixed bug in calls to `IngestExternalFiles()` with files for multiple column families. The bug could have introduced a delay in ingested file keys becoming visible after `IngestExternalFiles()` returned. Furthermore, mutations to ingested file keys while they were invisible could have been dropped (not necessarily immediately).
* Fixed a possible race condition impacting users of `WriteBufferManager` who constructed it with `allow_stall == true`. The race condition led to undefined behavior (in our experience, typically a process crash).
* Fixed a bug where stalled writes would remain stalled forever after the user calls `WriteBufferManager::SetBufferSize()` with `new_size == 0` to dynamically disable memory limiting.
* Make `DB::close()` thread-safe.
* Fix a bug in atomic flush where one bg flush thread will wait forever for a preceding bg flush thread to commit its result to MANIFEST but encounters an error which is mapped to a soft error (DB not stopped).
* Fix a bug in `BackupEngine` where some internal callers of `GenericRateLimiter::Request()` do not honor `bytes <= GetSingleBurstBytes()`.
### New Features
* Print information about blob files when using "ldb list_live_files_metadata"
* Provided support for SingleDelete with user defined timestamp.
* Experimental new function DB::GetLiveFilesStorageInfo offers essentially a unified version of other functions like GetLiveFiles, GetLiveFilesChecksumInfo, and GetSortedWalFiles. Checkpoints and backups could show small behavioral changes and/or improved performance as they now use this new API.
* Add remote compaction read/write bytes statistics: `REMOTE_COMPACT_READ_BYTES`, `REMOTE_COMPACT_WRITE_BYTES`.
* Introduce an experimental feature to dump out the blocks from block cache and insert them to the secondary cache to reduce the cache warmup time (e.g., used while migrating DB instance). More information are in `class CacheDumper` and `CacheDumpedLoader` at `rocksdb/utilities/cache_dump_load.h` Note that, this feature is subject to the potential change in the future, it is still experimental.
* Introduced a new BlobDB configuration option `blob_garbage_collection_force_threshold`, which can be used to trigger compactions targeting the SST files which reference the oldest blob files when the ratio of garbage in those blob files meets or exceeds the specified threshold. This can reduce space amplification with skewed workloads where the affected SST files might not otherwise get picked up for compaction.
* Added EXPERIMENTAL support for table file (SST) unique identifiers that are stable and universally unique, available with new function `GetUniqueIdFromTableProperties`. Only SST files from RocksDB >= 6.24 support unique IDs.
* Added `GetMapProperty()` support for "rocksdb.dbstats" (`DB::Properties::kDBStats`). As a map property, it includes DB-level internal stats accumulated over the DB's lifetime, such as user write related stats and uptime.
### Public API change
* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name.
* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method.
* Some fields of SstFileMetaData are deprecated for compatibility with new base class FileStorageInfo.
* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files.
* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK.
* Add CacheTier to advanced_options.h to describe the cache tier we used. Add a `lowest_used_cache_tier` option to `DBOptions` (immutable) and pass it to BlockBasedTableReader. By default it is `CacheTier::kNonVolatileBlockTier`, which means, we always use both block cache (kVolatileTier) and secondary cache (kNonVolatileBlockTier). By set it to `CacheTier::kVolatileTier`, the DB will not use the secondary cache.
* Even when options.max_compaction_bytes is hit, compaction output files are only cut when it aligns with grandparent files' boundaries. options.max_compaction_bytes could be slightly violated with the change, but the violation is no more than one target SST file size, which is usually much smaller.
### Performance Improvements
* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040).
### Java API Changes
* Add Java API bindings for new integrated BlobDB options
* `keyMayExist()` supports ByteBuffer.
* Fix multiget throwing Null Pointer Exception for num of keys > 70k (https://github.com/facebook/rocksdb/issues/8039).
## 6.25.0 (2021-09-20)
### Bug Fixes
* Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion.
* Fixed a bug of secondary instance's last_sequence going backward, and reads on the secondary fail to see recent updates from the primary.
* Fixed a bug that could lead to duplicate DB ID or DB session ID in POSIX environments without /proc/sys/kernel/random/uuid.
* Fix a race in DumpStats() with column family destruction due to not taking a Ref on each entry while iterating the ColumnFamilySet.
* Fix a race in item ref counting in LRUCache when promoting an item from the SecondaryCache.
* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations.
* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding.
* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates.
* Fix the implementation of `prepopulate_block_cache = kFlushOnly` to only apply to flushes rather than to all generated files.
* Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_.
* Add checks for validity of the IO uring completion queue entries, and fail the BlockBasedTableReader MultiGet sub-batch if there's an invalid completion
* Add an interface RocksDbIOUringEnable() that, if defined by the user, will allow them to enable/disable the use of IO uring by RocksDB
* Fix the bug that when direct I/O is used and MultiRead() returns a short result, RandomAccessFileReader::MultiRead() still returns full size buffer, with returned short value together with some data in original buffer. This bug is unlikely cause incorrect results, because (1) since FileSystem layer is expected to retry on short result, returning short results is only possible when asking more bytes in the end of the file, which RocksDB doesn't do when using MultiRead(); (2) checksum is unlikely to match.
### New Features
* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions.
* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries.
* Added ticker statistics, "rocksdb.backup.read.bytes" and "rocksdb.backup.write.bytes", reporting how many bytes were read and written during backup.
* Added properties for BlobDB: `rocksdb.num-blob-files`, `rocksdb.blob-stats`, `rocksdb.total-blob-file-size`, and `rocksdb.live-blob-file-size`. The existing property `rocksdb.estimate_live-data-size` was also extended to include live bytes residing in blob files.
* Added two new RateLimiter IOPriorities: `Env::IO_USER`,`Env::IO_MID`. `Env::IO_USER` will have superior priority over all other RateLimiter IOPriorities without being subject to fair scheduling constraint.
* `SstFileWriter` now supports `Put`s and `Delete`s with user-defined timestamps. Note that the ingestion logic itself is not timestamp-aware yet.
* Allow a single write batch to include keys from multiple column families whose timestamps' formats can differ. For example, some column families may disable timestamp, while others enable timestamp.
* Add compaction priority information in RemoteCompaction, which can be used to schedule high priority job first.
* Added new callback APIs `OnBlobFileCreationStarted`,`OnBlobFileCreated`and `OnBlobFileDeleted` in `EventListener` class of listener.h. It notifies listeners during creation/deletion of individual blob files in Integrated BlobDB. It also log blob file creation finished event and deletion event in LOG file.
* Batch blob read requests for `DB::MultiGet` using `MultiRead`.
* Add support for fallback to local compaction, the user can return `CompactionServiceJobStatus::kUseLocal` to instruct RocksDB to run the compaction locally instead of waiting for the remote compaction result.
* Add built-in rate limiter's implementation of `RateLimiter::GetTotalPendingRequest(int64_t* total_pending_requests, const Env::IOPriority pri)` for the total number of requests that are pending for bytes in the rate limiter.
* Charge memory usage during data buffering, from which training samples are gathered for dictionary compression, to block cache. Unbuffering data can now be triggered if the block cache becomes full and `strict_capacity_limit=true` for the block cache, in addition to existing conditions that can trigger unbuffering.
### Public API change
* Remove obsolete implementation details FullKey and ParseFullKey from public API
* Change `SstFileMetaData::size` from `size_t` to `uint64_t`.
* Made Statistics extend the Customizable class and added a CreateFromString method. Implementations of Statistics need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method.
* Extended `FlushJobInfo` and `CompactionJobInfo` in listener.h to provide information about the blob files generated by a flush/compaction and garbage collected during compaction in Integrated BlobDB. Added struct members `blob_file_addition_infos` and `blob_file_garbage_infos` that contain this information.
* Extended parameter `output_file_names` of `CompactFiles` API to also include paths of the blob files generated by the compaction in Integrated BlobDB.
* Most `BackupEngine` functions now return `IOStatus` instead of `Status`. Most existing code should be compatible with this change but some calls might need to be updated.
* Add a new field `level_at_creation` in `TablePropertiesCollectorFactory::Context` to capture the level at creating the SST file (i.e, table), of which the properties are being collected.
### Miscellaneous
* Add a paranoid check where in case FileSystem layer doesn't fill the buffer but returns succeed, checksum is unlikely to match even if buffer contains a previous block. The byte modified is not useful anyway, so it isn't expected to change any behavior when FileSystem is satisfying its contract.
## 6.24.1 (2021-08-31)
### Bug Fixes
* Fix a race in item ref counting in LRUCache when promoting an item from the SecondaryCache.
## 6.24.0 (2021-08-20)
### Bug Fixes
@ -387,7 +17,7 @@ Note: The next release will be major release 7.0. See https://github.com/faceboo
* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`.
* Fixed a potential incorrect result in opt mode and assertion failures caused by releasing snapshot(s) during compaction.
* Fixed passing of BlobFileCompletionCallback to Compaction job and Atomic flush job which was default paramter (nullptr). BlobFileCompletitionCallback is internal callback that manages addition of blob files to SSTFileManager.
* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters.
* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters
### New Features
* Made the EventListener extend the Customizable class.
@ -398,8 +28,8 @@ Note: The next release will be major release 7.0. See https://github.com/faceboo
* Add a comment to suggest btrfs user to disable file preallocation by setting `options.allow_fallocate=false`.
* Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench.
* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature.
* Added a stat rocksdb.secondary.cache.hits.
* Added a PerfContext counter secondary_cache_hit_count.
* Added a stat rocksdb.secondary.cache.hits
* Added a PerfContext counter secondary_cache_hit_count
* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`.
* Added hybrid configuration of Ribbon filter and Bloom filter where some LSM levels use Ribbon for memory space efficiency and some use Bloom for speed. See NewRibbonFilterPolicy. This also changes the default behavior of NewRibbonFilterPolicy to use Bloom for flushes under Leveled and Universal compaction and Ribbon otherwise. The C API function `rocksdb_filterpolicy_create_ribbon` is unchanged but adds new `rocksdb_filterpolicy_create_ribbon_hybrid`.
@ -1721,7 +1351,7 @@ if set to something > 0 user will see 2 changes in iterators behavior 1) only ke
* Added a new way to report QPS from db_bench (check out --report_file and --report_interval_seconds)
* Added a cache for individual rows. See DBOptions::row_cache for more info.
* Several new features on EventListener (see include/rocksdb/listener.h):
- OnCompactionCompleted() now returns per-compaction job statistics, defined in include/rocksdb/compaction_job_stats.h.
- OnCompationCompleted() now returns per-compaction job statistics, defined in include/rocksdb/compaction_job_stats.h.
- Added OnTableFileCreated() and OnTableFileDeleted().
* Add compaction_options_universal.enable_trivial_move to true, to allow trivial move while performing universal compaction. Trivial move will happen only when all the input files are non overlapping.

View File

@ -6,7 +6,7 @@ than release mode.
RocksDB's library should be able to compile without any dependency installed,
although we recommend installing some compression libraries (see below).
We do depend on newer gcc/clang with C++17 support (GCC >= 7, Clang >= 5).
We do depend on newer gcc/clang with C++11 support.
There are few options when compiling RocksDB:
@ -47,12 +47,10 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* If you wish to build the RocksJava static target, then cmake is required for building Snappy.
* If you wish to run microbench (e.g, `make microbench`, `make ribbon_bench` or `cmake -DWITH_BENCHMARK=1`), Google benchmark >= 1.6.0 is needed.
## Supported platforms
* **Linux - Ubuntu**
* Upgrade your gcc to version at least 7 to get C++17 support.
* Upgrade your gcc to version at least 4.8 to get C++11 support.
* Install gflags. First, try: `sudo apt-get install libgflags-dev`
If this doesn't work and you're using Ubuntu, here's a nice tutorial:
(http://askubuntu.com/questions/312173/installing-gflags-12-04)
@ -64,7 +62,8 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* Install zstandard: `sudo apt-get install libzstd-dev`.
* **Linux - CentOS / RHEL**
* Upgrade your gcc to version at least 7 to get C++17 support
* Upgrade your gcc to version at least 4.8 to get C++11 support:
`yum install gcc48-c++`
* Install gflags:
git clone https://github.com/gflags/gflags.git
@ -114,11 +113,11 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
make && sudo make install
* **OS X**:
* Install latest C++ compiler that supports C++ 17:
* Install latest C++ compiler that supports C++ 11:
* Update XCode: run `xcode-select --install` (or install it from XCode App's settting).
* Install via [homebrew](http://brew.sh/).
* If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line.
* run `brew tap homebrew/versions; brew install gcc7 --use-llvm` to install gcc 7 (or higher).
* run `brew tap homebrew/versions; brew install gcc48 --use-llvm` to install gcc 4.8 (or higher).
* run `brew install rocksdb`
* **FreeBSD** (11.01):
@ -180,7 +179,8 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* **iOS**:
* Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`.
* **Windows** (Visual Studio 2017 to up):
* **Windows**:
* For building with MS Visual Studio 13 you will need Update 4 installed.
* Read and follow the instructions at CMakeLists.txt
* Or install via [vcpkg](https://github.com/microsoft/vcpkg)
* run `vcpkg install rocksdb:x64-windows`
@ -201,7 +201,7 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
export PATH=/opt/freeware/bin:$PATH
* **Solaris Sparc**
* Install GCC 7 and higher.
* Install GCC 4.8.2 and higher.
* Use these environment variables:
export CC=gcc
@ -210,3 +210,4 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
export EXTRA_LDFLAGS=-m64
export PORTABLE=1
export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"

View File

@ -1,6 +1,6 @@
This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it.
* Java - https://github.com/facebook/rocksdb/tree/main/java
* Java - https://github.com/facebook/rocksdb/tree/master/java
* Python
* http://python-rocksdb.readthedocs.io/en/latest/
* http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained)

570
Makefile

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,4 @@
This is the list of all known third-party plugins for RocksDB. If something is missing, please open a pull request to add it.
* [Dedupfs](https://github.com/ajkr/dedupfs): an example for plugin developers to reference
* [HDFS](https://github.com/riversand963/rocksdb-hdfs-env): an Env used for interacting with HDFS. Migrated from main RocksDB repo
* [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices
* [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo.
* [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB.

View File

@ -1,8 +1,8 @@
## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb)
[![TravisCI Status](https://api.travis-ci.com/facebook/rocksdb.svg?branch=main)](https://travis-ci.com/github/facebook/rocksdb)
[![Appveyor Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/main?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/main)
[![TravisCI Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb)
[![Appveyor Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/master?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/master)
[![PPC64le Build Status](http://140-211-168-68-openstack.osuosl.org:8080/buildStatus/icon?job=rocksdb&style=plastic)](http://140-211-168-68-openstack.osuosl.org:8080/job/rocksdb)
RocksDB is developed and maintained by Facebook Database Engineering Team.
@ -17,7 +17,7 @@ and Space-Amplification-Factor (SAF). It has multi-threaded compactions,
making it especially suitable for storing multiple terabytes of data in a
single database.
Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples
Start with example usage here: https://github.com/facebook/rocksdb/tree/master/examples
See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation.
@ -25,7 +25,7 @@ The public interface is in `include/`. Callers should not include or
rely on the details of any other header files in this package. Those
internal APIs may be changed without warning.
Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups.
Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/ and https://rocksdb.slack.com/
## License

View File

@ -4,7 +4,7 @@ RocksDBLite is a project focused on mobile use cases, which don't need a lot of
Some examples of the features disabled by ROCKSDB_LITE:
* compiled-in support for LDB tool
* No backup engine
* No backupable DB
* No support for replication (which we provide in form of TransactionalIterator)
* No advanced monitoring tools
* No special-purpose memtables that are highly optimized for specific use cases

6771
TARGETS

File diff suppressed because it is too large Load Diff

View File

@ -113,16 +113,5 @@ LzLabs is using RocksDB as a storage engine in their multi-database distributed
[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/.
## ArangoDB
[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine.
[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its sotrage engine.
## Milvus
[Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue.
## Kafka
[Kafka](https://kafka.apache.org/) is an open-source distributed event streaming platform, it uses RocksDB to store state in Kafka Streams: https://www.confluent.io/blog/how-to-tune-rocksdb-kafka-streams-state-stores-performance/.
## Solana Labs
[Solana](https://github.com/solana-labs/solana) is a fast, secure, scalable, and decentralized blockchain. It uses RocksDB as the underlying storage for its ledger store.
## Others
More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb).

View File

@ -24,7 +24,7 @@ We strive to achieve the following goals:
* make all unit test pass both in debug and release builds.
* Note: latest introduction of SyncPoint seems to disable running db_test in Release.
* make performance on par with published benchmarks accounting for HW differences
* we would like to keep the port code inline with the main branch with no forking
* we would like to keep the port code inline with the master branch with no forking
## Build system
We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient.
@ -66,7 +66,7 @@ We endeavored to make it functionally on par with posix_env. This means we repli
Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments.
For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc.
The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It's not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It’s not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access.

77
appveyor.yml Normal file
View File

@ -0,0 +1,77 @@
version: 1.0.{build}
image: Visual Studio 2019
environment:
JAVA_HOME: C:\Program Files\Java\jdk1.8.0
THIRDPARTY_HOME: $(APPVEYOR_BUILD_FOLDER)\thirdparty
SNAPPY_HOME: $(THIRDPARTY_HOME)\snappy-1.1.7
SNAPPY_INCLUDE: $(SNAPPY_HOME);$(SNAPPY_HOME)\build
SNAPPY_LIB_DEBUG: $(SNAPPY_HOME)\build\Debug\snappy.lib
SNAPPY_LIB_RELEASE: $(SNAPPY_HOME)\build\Release\snappy.lib
LZ4_HOME: $(THIRDPARTY_HOME)\lz4-1.8.3
LZ4_INCLUDE: $(LZ4_HOME)\lib
LZ4_LIB_DEBUG: $(LZ4_HOME)\visual\VS2010\bin\x64_Debug\liblz4_static.lib
LZ4_LIB_RELEASE: $(LZ4_HOME)\visual\VS2010\bin\x64_Release\liblz4_static.lib
ZSTD_HOME: $(THIRDPARTY_HOME)\zstd-1.4.0
ZSTD_INCLUDE: $(ZSTD_HOME)\lib;$(ZSTD_HOME)\lib\dictBuilder
ZSTD_LIB_DEBUG: $(ZSTD_HOME)\build\VS2010\bin\x64_Debug\libzstd_static.lib
ZSTD_LIB_RELEASE: $(ZSTD_HOME)\build\VS2010\bin\x64_Release\libzstd_static.lib
matrix:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
CMAKE_GENERATOR: Visual Studio 14 Win64
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
CMAKE_GENERATOR: Visual Studio 15 Win64
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\devenv.com
install:
- md %THIRDPARTY_HOME%
- echo "Building Snappy dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip
- unzip snappy-1.1.7.zip
- cd snappy-1.1.7
- mkdir build
- cd build
- if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%")
- cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT%
- msbuild Snappy.sln /p:Configuration=Debug /p:Platform=x64
- msbuild Snappy.sln /p:Configuration=Release /p:Platform=x64
- echo "Building LZ4 dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output lz4-1.8.3.zip --location https://github.com/lz4/lz4/archive/v1.8.3.zip
- unzip lz4-1.8.3.zip
- cd lz4-1.8.3\visual\VS2010
- ps: $CMD="$Env:DEV_ENV"; & $CMD lz4.sln /upgrade
- msbuild lz4.sln /p:Configuration=Debug /p:Platform=x64
- msbuild lz4.sln /p:Configuration=Release /p:Platform=x64
- echo "Building ZStd dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output zstd-1.4.0.zip --location https://github.com/facebook/zstd/archive/v1.4.0.zip
- unzip zstd-1.4.0.zip
- cd zstd-1.4.0\build\VS2010
- ps: $CMD="$Env:DEV_ENV"; & $CMD zstd.sln /upgrade
- msbuild zstd.sln /p:Configuration=Debug /p:Platform=x64
- msbuild zstd.sln /p:Configuration=Release /p:Platform=x64
before_build:
- md %APPVEYOR_BUILD_FOLDER%\build
- cd %APPVEYOR_BUILD_FOLDER%\build
- if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%")
- cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% %CMAKE_OPT% -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1 -DWITH_ALL_TESTS=0
- cd ..
build:
project: build\rocksdb.sln
parallel: true
verbosity: normal
test:
test_script:
- ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,env_basic_test -Concurrency 8
on_failure:
- cmd: 7z a build-failed.zip %APPVEYOR_BUILD_FOLDER%\build\ && appveyor PushArtifact build-failed.zip

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

82
buckifier/buckify_rocksdb.py Executable file → Normal file
View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
@ -144,8 +143,7 @@ def generate_targets(repo_path, deps_map):
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"])
src_mk["TOOL_LIB_SOURCES"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
@ -153,7 +151,7 @@ def generate_targets(repo_path, deps_map):
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"],
deps=None,
headers=None,
extra_external_deps="",
link_whole=True)
@ -165,8 +163,9 @@ def generate_targets(repo_path, deps_map):
src_mk.get("EXP_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"],
extra_test_libs=True
)
extra_external_deps=""" + [
("googletest", None, "gtest"),
]""")
# rocksdb_tools_lib
TARGETS.add_library(
"rocksdb_tools_lib",
@ -185,23 +184,12 @@ def generate_targets(repo_path, deps_map):
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]
TARGETS.add_binary(
name,
[src],
[],
extra_bench_libs=True
)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path
test_source_map = {}
print(src_mk)
# c_test.c is added through TARGETS.add_c_test(). If there
# are more than one .c test file, we need to extend
@ -212,42 +200,6 @@ def generate_targets(repo_path, deps_map):
return False
TARGETS.add_c_test()
try:
with open(f"{repo_path}/buckifier/bench.json") as json_file:
fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
# it is better servicelab experiments break
# than rocksdb github ci
except Exception:
pass
TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip()
test_source_map[test] = test_src
@ -261,21 +213,17 @@ def generate_targets(repo_path, deps_map):
test_target_name = \
test if not target_alias else test + "_" + target_alias
TARGETS.register_test(
test_target_name,
test_src,
test not in non_parallel_tests,
json.dumps(deps['extra_deps']),
json.dumps(deps['extra_compiler_flags']))
if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True)
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
else:
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
TARGETS.add_library(test_library, [test_src], [":rocksdb_test_lib"])
TARGETS.flush_tests()
print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib))

View File

@ -10,7 +10,6 @@ except ImportError:
from __builtin__ import object
from __builtin__ import str
import targets_cfg
import pprint
def pretty_list(lst, indent=8):
if lst is None or len(lst) == 0:
@ -41,73 +40,85 @@ class TARGETSBuilder(object):
self.targets_file.close()
def add_library(self, name, srcs, deps=None, headers=None,
extra_external_deps="", link_whole=False,
external_dependencies=None, extra_test_libs=False):
if headers is not None:
extra_external_deps="", link_whole=False):
headers_attr_prefix = ""
if headers is None:
headers_attr_prefix = "auto_"
headers = "AutoHeaders.RECURSIVE_GLOB"
else:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.library_template.format(
name=name,
srcs=pretty_list(srcs),
headers_attr_prefix=headers_attr_prefix,
headers=headers,
deps=pretty_list(deps),
extra_external_deps=extra_external_deps,
link_whole=link_whole,
external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs
).encode("utf-8"))
link_whole=link_whole).encode("utf-8"))
self.total_lib = self.total_lib + 1
def add_rocksdb_library(self, name, srcs, headers=None,
external_dependencies=None):
if headers is not None:
def add_rocksdb_library(self, name, srcs, headers=None):
headers_attr_prefix = ""
if headers is None:
headers_attr_prefix = "auto_"
headers = "AutoHeaders.RECURSIVE_GLOB"
else:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.rocksdb_library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
external_dependencies=pretty_list(external_dependencies)
).encode("utf-8")
)
headers_attr_prefix=headers_attr_prefix,
headers=headers).encode("utf-8"))
self.total_lib = self.total_lib + 1
def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False):
def add_binary(self, name, srcs, deps=None):
self.targets_file.write(targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8"))
deps=pretty_list(deps)).encode("utf-8"))
self.total_bin = self.total_bin + 1
def add_c_test(self):
self.targets_file.write(b"""
add_c_test_wrapper()
""")
cpp_binary(
name = "c_test_bin",
srcs = ["db/c_test.c"],
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
include_paths = ROCKSDB_INCLUDE_PATHS,
deps = [":rocksdb_test_lib"],
) if not is_opt_mode else None
def add_test_header(self):
self.targets_file.write(b"""
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
custom_unittest(
name = "c_test",
command = [
native.package_name() + "/buckifier/rocks_test_runner.sh",
"$(location :{})".format("c_test_bin"),
],
type = "simple",
) if not is_opt_mode else None
""")
def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold):
self.targets_file.write(targets_cfg.fancy_bench_template.format(
name=name,
bench_config=pprint.pformat(bench_config),
slow=slow,
expected_runtime=expected_runtime,
sl_iterations=sl_iterations,
regression_threshold=regression_threshold
).encode("utf-8"))
def register_test(self,
test_name,
src,
deps,
is_parallel,
extra_deps,
extra_compiler_flags):
exec_mode = "serial"
if is_parallel:
exec_mode = "parallel"
self.tests_cfg += targets_cfg.test_cfg_template % (
test_name,
str(src),
str(exec_mode),
extra_deps,
extra_compiler_flags)
self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps,
extra_compiler_flags=extra_compiler_flags).encode("utf-8"))
self.total_test = self.total_test + 1
def flush_tests(self):
self.targets_file.write(targets_cfg.unittests_template.format(tests=self.tests_cfg).encode("utf-8"))
self.tests_cfg = ""

View File

@ -11,36 +11,210 @@ rocksdb_target_header_template = \
# This file is a Facebook-specific integration for buck builds, so can
# only be validated by Facebook employees.
#
# @noautodeps @nocodemods
load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper")
load("@fbcode_macros//build_defs:auto_headers.bzl", "AutoHeaders")
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
load(":defs.bzl", "test_binary")
REPO_PATH = package_name() + "/"
ROCKSDB_COMPILER_FLAGS_0 = [
"-fno-builtin-memcmp",
# Needed to compile in fbcode
"-Wno-expansion-to-defined",
# Added missing flags from output of build_detect_platform
"-Wnarrowing",
"-DROCKSDB_NO_DYNAMIC_EXTENSION",
]
ROCKSDB_EXTERNAL_DEPS = [
("bzip2", None, "bz2"),
("snappy", None, "snappy"),
("zlib", None, "z"),
("gflags", None, "gflags"),
("lz4", None, "lz4"),
("zstd", None, "zstd"),
]
ROCKSDB_OS_DEPS_0 = [
(
"linux",
["third-party//numa:numa", "third-party//liburing:uring", "third-party//tbb:tbb"],
),
(
"macos",
["third-party//tbb:tbb"],
),
]
ROCKSDB_OS_PREPROCESSOR_FLAGS_0 = [
(
"linux",
[
"-DOS_LINUX",
"-DROCKSDB_FALLOCATE_PRESENT",
"-DROCKSDB_MALLOC_USABLE_SIZE",
"-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX",
"-DROCKSDB_RANGESYNC_PRESENT",
"-DROCKSDB_SCHED_GETCPU_PRESENT",
"-DROCKSDB_IOURING_PRESENT",
"-DHAVE_SSE42",
"-DLIBURING",
"-DNUMA",
"-DROCKSDB_PLATFORM_POSIX",
"-DROCKSDB_LIB_IO_POSIX",
"-DTBB",
],
),
(
"macos",
[
"-DOS_MACOSX",
"-DROCKSDB_PLATFORM_POSIX",
"-DROCKSDB_LIB_IO_POSIX",
"-DTBB",
],
),
(
"windows",
[ "-DOS_WIN", "-DWIN32", "-D_MBCS", "-DWIN64", "-DNOMINMAX" ]
),
]
ROCKSDB_PREPROCESSOR_FLAGS = [
"-DROCKSDB_SUPPORT_THREAD_LOCAL",
# Flags to enable libs we include
"-DSNAPPY",
"-DZLIB",
"-DBZIP2",
"-DLZ4",
"-DZSTD",
"-DZSTD_STATIC_LINKING_ONLY",
"-DGFLAGS=gflags",
# Added missing flags from output of build_detect_platform
"-DROCKSDB_BACKTRACE",
]
# Directories with files for #include
ROCKSDB_INCLUDE_PATHS = [
"",
"include",
]
ROCKSDB_ARCH_PREPROCESSOR_FLAGS = {{
"x86_64": [
"-DHAVE_PCLMUL",
],
}}
build_mode = read_config("fbcode", "build_mode")
is_opt_mode = build_mode.startswith("opt")
# -DNDEBUG is added by default in opt mode in fbcode. But adding it twice
# doesn't harm and avoid forgetting to add it.
ROCKSDB_COMPILER_FLAGS = ROCKSDB_COMPILER_FLAGS_0 + (["-DNDEBUG"] if is_opt_mode else [])
sanitizer = read_config("fbcode", "sanitizer")
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
# whether the binary is linked with jemalloc at runtime.
ROCKSDB_OS_PREPROCESSOR_FLAGS = ROCKSDB_OS_PREPROCESSOR_FLAGS_0 + ([(
"linux",
["-DROCKSDB_JEMALLOC"],
)] if sanitizer == "" else [])
ROCKSDB_OS_DEPS = ROCKSDB_OS_DEPS_0 + ([(
"linux",
["third-party//jemalloc:headers"],
)] if sanitizer == "" else [])
ROCKSDB_LIB_DEPS = [
":rocksdb_lib",
":rocksdb_test_lib",
] if not is_opt_mode else [":rocksdb_lib"]
"""
library_template = """
cpp_library_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], headers={headers}, link_whole={link_whole}, extra_test_libs={extra_test_libs})
cpp_library(
name = "{name}",
srcs = [{srcs}],
{headers_attr_prefix}headers = {headers},
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
os_deps = ROCKSDB_OS_DEPS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
include_paths = ROCKSDB_INCLUDE_PATHS,
deps = [{deps}],
external_deps = ROCKSDB_EXTERNAL_DEPS{extra_external_deps},
link_whole = {link_whole},
)
"""
rocksdb_library_template = """
rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
cpp_library(
name = "{name}",
srcs = [{srcs}],
{headers_attr_prefix}headers = {headers},
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
os_deps = ROCKSDB_OS_DEPS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
include_paths = ROCKSDB_INCLUDE_PATHS,
deps = ROCKSDB_LIB_DEPS,
external_deps = ROCKSDB_EXTERNAL_DEPS,
)
"""
binary_template = """
cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
cpp_binary(
name = "{name}",
srcs = [{srcs}],
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
include_paths = ROCKSDB_INCLUDE_PATHS,
deps = [{deps}],
external_deps = ROCKSDB_EXTERNAL_DEPS,
)
"""
test_cfg_template = """ [
"%s",
"%s",
"%s",
%s,
%s,
],
"""
unittests_template = """
cpp_unittest_wrapper(name="{test_name}",
srcs=["{test_cc}"],
deps={deps},
extra_compiler_flags={extra_compiler_flags})
"""
fancy_bench_template = """
fancy_bench_wrapper(suite_name="{name}", binary_to_bench_to_metric_list_map={bench_config}, slow={slow}, expected_runtime={expected_runtime}, sl_iterations={sl_iterations}, regression_threshold={regression_threshold})
# [test_name, test_src, test_type, extra_deps, extra_compiler_flags]
ROCKS_TESTS = [
{tests}]
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
[
cpp_unittest(
name = test_name,
srcs = [test_cc],
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS + extra_compiler_flags,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
include_paths = ROCKSDB_INCLUDE_PATHS,
deps = [":rocksdb_test_lib"] + extra_deps,
external_deps = ROCKSDB_EXTERNAL_DEPS + [
("googletest", None, "gtest"),
],
)
for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS
if not is_opt_mode
]
"""

View File

@ -45,11 +45,11 @@ if test -z "$OUTPUT"; then
exit 1
fi
# we depend on C++17, but should be compatible with newer standards
# we depend on C++11, but should be compatible with newer standards
if [ "$ROCKSDB_CXX_STANDARD" ]; then
PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD"
else
PLATFORM_CXXFLAGS="-std=c++17"
PLATFORM_CXXFLAGS="-std=c++11"
fi
# we currently depend on POSIX platform
@ -58,13 +58,15 @@ COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX"
# Default to fbcode gcc on internal fb machines
if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
FBCODE_BUILD="true"
# If we're compiling with TSAN or shared lib, we need pic build
# If we're compiling with TSAN we need pic build
PIC_BUILD=$COMPILE_WITH_TSAN
if [ "$LIB_MODE" == "shared" ]; then
PIC_BUILD=1
fi
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM010" ]; then
source "$PWD/build_tools/fbcode_config_platform010.sh"
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
# we need this to build with MySQL. Don't use for other purposes.
source "$PWD/build_tools/fbcode_config4.8.1.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then
source "$PWD/build_tools/fbcode_config.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007" ]; then
source "$PWD/build_tools/fbcode_config_platform007.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM009" ]; then
source "$PWD/build_tools/fbcode_config_platform009.sh"
else
@ -174,7 +176,7 @@ case "$TARGET_OS" in
fi
if test "$ROCKSDB_USE_IO_URING" -ne 0; then
# check for liburing
$CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o /dev/null 2>/dev/null <<EOF
#include <liburing.h>
int main() {
struct io_uring ring;
@ -269,7 +271,7 @@ esac
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
JAVA_LDFLAGS="$PLATFORM_LDFLAGS"
JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS"
JAVAC_ARGS="-source 8"
JAVAC_ARGS="-source 7"
if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Cross-compiling; do not try any compilation tests.
@ -282,7 +284,7 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
else
if ! test $ROCKSDB_DISABLE_FALLOCATE; then
# Test whether fallocate is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <fcntl.h>
#include <linux/falloc.h>
int main() {
@ -298,7 +300,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <snappy.h>
int main() {}
EOF
@ -313,7 +315,7 @@ EOF
# Test whether gflags library is installed
# http://gflags.github.io/gflags/
# check if the namespace is gflags
if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
if $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace GFLAGS_NAMESPACE;
int main() {}
@ -322,7 +324,7 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is gflags
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace gflags;
int main() {}
@ -331,7 +333,7 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is google
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace google;
int main() {}
@ -344,7 +346,7 @@ EOF
if ! test $ROCKSDB_DISABLE_ZLIB; then
# Test whether zlib library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <zlib.h>
int main() {}
EOF
@ -357,7 +359,7 @@ EOF
if ! test $ROCKSDB_DISABLE_BZIP; then
# Test whether bzip library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <bzlib.h>
int main() {}
EOF
@ -370,7 +372,7 @@ EOF
if ! test $ROCKSDB_DISABLE_LZ4; then
# Test whether lz4 library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <lz4.h>
#include <lz4hc.h>
int main() {}
@ -397,7 +399,7 @@ EOF
if ! test $ROCKSDB_DISABLE_NUMA; then
# Test whether numa is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -lnuma 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null <<EOF
#include <numa.h>
#include <numaif.h>
int main() {}
@ -411,7 +413,7 @@ EOF
if ! test $ROCKSDB_DISABLE_TBB; then
# Test whether tbb is available
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ltbb 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o /dev/null -ltbb 2>/dev/null <<EOF
#include <tbb/tbb.h>
int main() {}
EOF
@ -424,7 +426,7 @@ EOF
if ! test $ROCKSDB_DISABLE_JEMALLOC; then
# Test whether jemalloc is available
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -ljemalloc \
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -ljemalloc \
2>/dev/null; then
# This will enable some preprocessor identifiers in the Makefile
JEMALLOC=1
@ -445,7 +447,7 @@ EOF
fi
if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then
# jemalloc is not available. Let's try tcmalloc
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null \
-ltcmalloc 2>/dev/null; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc"
JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc"
@ -454,7 +456,7 @@ EOF
if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then
# Test whether malloc_usable_size is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <malloc.h>
int main() {
size_t res = malloc_usable_size(0);
@ -469,7 +471,7 @@ EOF
if ! test $ROCKSDB_DISABLE_MEMKIND; then
# Test whether memkind library is installed
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -lmemkind -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <memkind.h>
int main() {
memkind_malloc(MEMKIND_DAX_KMEM, 1024);
@ -485,7 +487,7 @@ EOF
if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then
# Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <pthread.h>
int main() {
int x = PTHREAD_MUTEX_ADAPTIVE_NP;
@ -500,7 +502,7 @@ EOF
if ! test $ROCKSDB_DISABLE_BACKTRACE; then
# Test whether backtrace is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
@ -512,7 +514,7 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
else
# Test whether execinfo library is installed
$CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
@ -529,7 +531,7 @@ EOF
if ! test $ROCKSDB_DISABLE_PG; then
# Test if -pg is supported
$CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o /dev/null 2>/dev/null <<EOF
int main() {
return 0;
}
@ -541,7 +543,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SYNC_FILE_RANGE; then
# Test whether sync_file_range is supported for compatibility with an old glibc
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
int fd = open("/dev/null", 0);
@ -555,7 +557,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then
# Test whether sched_getcpu is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <sched.h>
int main() {
int cpuid = sched_getcpu();
@ -569,7 +571,7 @@ EOF
if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then
# Test whether getauxval is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <sys/auxv.h>
int main() {
uint64_t auxv = getauxval(AT_HWCAP);
@ -583,7 +585,7 @@ EOF
if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then
# Test whether c++17 aligned-new is supported
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null <<EOF
struct alignas(1024) t {int a;};
int main() {}
EOF
@ -593,7 +595,7 @@ EOF
fi
if ! test $ROCKSDB_DISABLE_BENCHMARK; then
# Test whether google benchmark is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lbenchmark -lpthread 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lbenchmark 2>/dev/null <<EOF
#include <benchmark/benchmark.h>
int main() {}
EOF
@ -604,10 +606,10 @@ EOF
fi
# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning.
# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386
if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then
# -Wshorten-64-to-32 breaks compilation on FreeBSD i386
if ! [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then
# Test whether -Wshorten-64-to-32 is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -Wshorten-64-to-32 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -Wshorten-64-to-32 2>/dev/null <<EOF
int main() {}
EOF
if [ "$?" = 0 ]; then
@ -615,27 +617,34 @@ EOF
fi
fi
# shall we use HDFS?
if test "$USE_HDFS"; then
if test -z "$JAVA_HOME"; then
echo "JAVA_HOME has to be set for HDFS usage." >&2
exit 1
fi
HDFS_CCFLAGS="$HDFS_CCFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -DUSE_HDFS -I$HADOOP_HOME/include"
HDFS_LDFLAGS="$HDFS_LDFLAGS -lhdfs -L$JAVA_HOME/jre/lib/amd64 -L$HADOOP_HOME/lib/native"
HDFS_LDFLAGS="$HDFS_LDFLAGS -L$JAVA_HOME/jre/lib/amd64/server -L$GLIBC_RUNTIME_PATH/lib"
HDFS_LDFLAGS="$HDFS_LDFLAGS -ldl -lverify -ljava -ljvm"
COMMON_FLAGS="$COMMON_FLAGS $HDFS_CCFLAGS"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $HDFS_LDFLAGS"
JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS"
fi
if test "0$PORTABLE" -eq 0; then
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# Tune for this POWER processor, treating '+' models as base models
POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+`
COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER "
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z10 "
elif test -n "`echo $TARGET_ARCHITECTURE | grep -e^arm -e^aarch64`"; then
# TODO: Handle this with approprite options.
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ \
-march=native - -o /dev/null 2>/dev/null; then
COMMON_FLAGS="$COMMON_FLAGS -march=native "
else
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
elif [ "$TARGET_OS" == "IOS" ]; then
COMMON_FLAGS="$COMMON_FLAGS"
elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then
@ -652,21 +661,11 @@ else
TRY_SSE_ETC="1"
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
fi
if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then
# For portability compile for macOS 10.12 (2016) or newer
COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.12"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.12"
# -mmacosx-version-min must come first here.
PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.12 $PLATFORM_SHARED_LDFLAGS"
PLATFORM_SHARED_LDFLAGS="$PLATFORM_SHARED_LDFLAGS -mmacosx-version-min=10.12"
PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.12"
JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.12"
JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
@ -713,7 +712,7 @@ if test "$TRY_SSE_ETC"; then
TRY_LZCNT="-mlzcnt"
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <nmmintrin.h>
int main() {
@ -727,7 +726,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <wmmintrin.h>
int main() {
@ -744,7 +743,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main() {
@ -759,7 +758,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
@ -773,7 +772,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
@ -787,7 +786,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
int main() {
uint64_t a = 0xffffFFFFffffFFFF;
@ -800,15 +799,31 @@ if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION"
fi
# thread_local is part of C++11 and later (TODO: clean up this define)
# iOS doesn't support thread-local storage, but this check would erroneously
# succeed because the cross-compiler flags are added by the Makefile, not this
# script.
if [ "$PLATFORM" != IOS ]; then
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
(void)tls;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
fi
fi
if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null <<EOF
void dummy_func() {}
EOF
if [ "$?" = 0 ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o test.o 2>/dev/null
$CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o /dev/null 2>/dev/null
if [ "$?" = 0 ]; then
EXEC_LDFLAGS+="-ldl"
rm -f test_dl.o
@ -816,20 +831,6 @@ EOF
fi
fi
# check for F_FULLFSYNC
$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
fcntl(0, F_FULLFSYNC);
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
fi
rm -f test.o test_dl.o
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
@ -880,8 +881,8 @@ if test -n "$WITH_JEMALLOC_FLAG"; then
echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
fi
echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
if test -n "$USE_FOLLY"; then
echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT"
if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT"
fi
if test -n "$PPC_LIBC_IS_GNU"; then
echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT"

View File

@ -1,36 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Check for some simple mistakes that should prevent commit or push
BAD=""
git grep 'namespace rocksdb' -- '*.[ch]*'
if [ "$?" != "1" ]; then
echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE"
BAD=1
fi
git grep -i 'nocommit' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo "^^^^^ Code was not intended to be committed"
BAD=1
fi
git grep '<rocksdb/' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo '^^^^^ Use double-quotes as in #include "rocksdb/something.h"'
BAD=1
fi
git grep 'using namespace' -- ':!build_tools' ':!docs' \
':!third-party/folly/folly/lang/Align.h' \
':!third-party/gtest-1.8.1/fused-src/gtest/gtest.h'
if [ "$?" != "1" ]; then
echo '^^^^ Do not use "using namespace"'
BAD=1
fi
if [ "$BAD" ]; then
exit 1
fi

View File

@ -0,0 +1,19 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/5.x/centos7-native/c447969
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/1bd23f9917738974ad0ff305aa23eb5f93f18305/9.0.0/centos7-native/c9f9104
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/5.x/gcc-5-glibc-2.23/339d858
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.23/gcc-5-glibc-2.23/ca1d1c0
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/gcc-5-glibc-2.23/9bc6787
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/gcc-5-glibc-2.23/9bc6787
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/gcc-5-glibc-2.23/9bc6787
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/gcc-5-glibc-2.23/9bc6787
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/gcc-5-glibc-2.23/03859b5
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/gcc-5-glibc-2.23/9bc6787
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/gcc-5-glibc-2.23/0c8f76d
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/gcc-5-glibc-2.23/9bc6787
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/gcc-5-glibc-2.23/b443de1
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/gcc-5-glibc-2.23/9bc6787
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/4.0.9-36_fbk5_2933_gd092e3f/gcc-5-glibc-2.23/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/2e3cb7d119b3cea5f1e738cc13a1ac69f49eb875/2.29.1/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/gcc-5-glibc-2.23/9bc6787
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.2.3/gcc-5-glibc-2.23/65372bd

View File

@ -0,0 +1,20 @@
# shellcheck disable=SC2148
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/cf7d14c625ce30bae1a4661c2319c5a283e4dd22/4.8.1/centos6-native/cc6c9dc
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/8598c375b0e94e1448182eb3df034704144a838d/stable/centos6-native/3f16ddd
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/d6e0a7da6faba45f5e5b1638f9edd7afc2f34e7d/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/d282e6e8f3d20f4e40a516834847bdc038e07973/2.17/gcc-4.8.1-glibc-2.17/99df8fc
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfcb4/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-4.8.1-glibc-2.17/c3f970a
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-4.8.1-glibc-2.17/c3f970a
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-4.8.1-glibc-2.17/8d31e51
NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/ad576de2a1ea560c4d3434304f0fc4e079bede42/trunk/gcc-4.8.1-glibc-2.17/675d945
TBB_BASE=/mnt/gvfs/third-party2/tbb/9d9a554877d0c5bef330fe818ab7178806dd316a/4.0_update2/gcc-4.8.1-glibc-2.17/c3f970a
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/7c111ff27e0c466235163f00f280a9d617c3d2ec/4.0.9-36_fbk5_2933_gd092e3f/gcc-4.8.1-glibc-2.17/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/b7fd454c4b10c6a81015d4524ed06cdeab558490/2.26/centos6-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d7f4d4d86674a57668e3a96f76f0e17dd0eb8765/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a
LUA_BASE=/mnt/gvfs/third-party2/lua/61e4abf5813bbc39bc4f548757ccfcadde175a48/5.2.3/centos6-native/730f94e

View File

@ -0,0 +1,20 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/79427253fd0d42677255aacfe6d13bfe63f752eb/20190828/platform007/ca4da3d
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832

View File

@ -18,5 +18,3 @@ KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/32b8a2407b634df3f8f948
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/08634589372fa5f237bfd374e8c644a8364e78c1/2.32/platform009/ba86d1f/
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/6ae525939ad02e5e676855082fbbc7828dbafeac/3.15.0/platform009/7f3b187
LUA_BASE=/mnt/gvfs/third-party2/lua/162efd9561a3d21f6869f4814011e9cf1b3ff4dc/5.3.4/platform009/a6271c4
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/30bf49ad6414325e17f3425b0edcb64239427ae3/1.6.1/platform009/7f3b187
BOOST_BASE=/mnt/gvfs/third-party2/boost/201b7d74941e54b436dfa364a063aa6d2cd7de4c/1.69.0/platform009/8a7ffdf

View File

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The file is generated using update_dependencies.sh.
GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97

View File

@ -21,7 +21,6 @@ LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
@ -30,31 +29,23 @@ if ! test $ROCKSDB_DISABLE_SNAPPY; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
fi
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
@ -62,7 +53,6 @@ if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@ -172,4 +162,6 @@ else
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
fi
USE_FOLLY_DISTRIBUTED_MUTEX=1
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -0,0 +1,120 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ compiler and also
# uses jemalloc
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_4.8.1.sh"
# location of libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# location of glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# location of snappy headers and libraries
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
# location of bzip headers and libraries
BZIP2_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a"
LZ4_INCLUDE=" -I $LZ4_BASE/include"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include"
JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
# location of tbb
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
CXX="$GCC_BASE/bin/gcc-ar"
CFLAGS="-B$BINUTILS/gold -m64 -mtune=generic"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/"
CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA -DTBB"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH="$LUA_BASE"
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE LUA_PATH

View File

@ -0,0 +1,170 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform007.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
if test -z $PIC_BUILD; then
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
else
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=gflags"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
if test -z $PIC_BUILD; then
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
if test -z $PIC_BUILD; then
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
else
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
fi
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
if test -z $PIC_BUILD; then
LIBURING_LIBS="$LIBURING_BASE/lib/liburing.a"
else
LIBURING_LIBS="$LIBURING_BASE/lib/liburing_pic.a"
fi
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
# lua not supported because it's on track for deprecation, I think
LUA_PATH=
LUA_LIB=
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -21,75 +21,78 @@ LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if ! test $ROCKSDB_DISABLE_ZLIB; then
if test -z $PIC_BUILD; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
else
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
BOOST_INCLUDE=" -I $BOOST_BASE/include/"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
if test -z $PIC_BUILD; then
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
else
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
fi
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
LIBURING_LIBS="$LIBURING_BASE/lib/liburing.a"
else
LIBURING_LIBS="$LIBURING_BASE/lib/liburing_pic.a"
fi
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
@ -99,9 +102,8 @@ export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
@ -147,7 +149,8 @@ CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform009/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform009/lib"
@ -156,9 +159,8 @@ EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
@ -166,4 +168,4 @@ VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH=
LUA_LIB=
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -1,175 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform010.sh"
# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
CFLAGS=" --sysroot=/DOES/NOT/EXIST"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -I$GCC_BASE/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -I$GLIBC_INCLUDE"
CFLAGS+=" -I$LIBGCC_BASE/include"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -38,9 +38,7 @@ if [ "$CLANG_FORMAT_DIFF" ]; then
fi
else
# First try directly executing the possibilities
if clang-format-diff --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff
elif clang-format-diff.py --help &> /dev/null < /dev/null; then
if clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff.py
elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py
@ -122,16 +120,16 @@ uncommitted_code=`git diff HEAD`
# If there's no uncommitted changes, we assume user are doing post-commit
# format check, in which case we'll try to check the modified lines vs. the
# facebook/rocksdb.git main branch. Otherwise, we'll check format of the
# facebook/rocksdb.git master branch. Otherwise, we'll check format of the
# uncommitted code only.
if [ -z "$uncommitted_code" ]
then
# Attempt to get name of facebook/rocksdb.git remote.
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
# Fall back on 'origin' if that fails
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin
# Use main branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')"
# Use master branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/master"
# Get the common ancestor with that remote branch. Everything after that
# common ancestor would be considered the contents of a pull request, so
# should be relevant for formatting fixes.

View File

@ -1561,7 +1561,6 @@ sub save_stdin_stdout_stderr {
::die_bug("Can't dup STDERR: $!");
open $Global::original_stdin, "<&", "STDIN" or
::die_bug("Can't dup STDIN: $!");
$Global::is_terminal = (-t $Global::original_stderr) && !$ENV{'CIRCLECI'} && !$ENV{'TRAVIS'};
}
sub enough_file_handles {
@ -1841,17 +1840,12 @@ sub start_another_job {
}
}
$opt::min_progress_interval = 0;
sub init_progress {
# Uses:
# $opt::bar
# Returns:
# list of computers for progress output
$|=1;
if (not $Global::is_terminal) {
$opt::min_progress_interval = 30;
}
if($opt::bar) {
return("","");
}
@ -1876,9 +1870,6 @@ sub drain_job_queue {
}
my $last_header="";
my $sleep = 0.2;
my $last_left = 1000000000;
my $last_progress_time = 0;
my $ps_reported = 0;
do {
while($Global::total_running > 0) {
debug($Global::total_running, "==", scalar
@ -1889,37 +1880,13 @@ sub drain_job_queue {
close $job->fh(0,"w");
}
}
# When not connected to terminal, assume CI (e.g. CircleCI). In
# that case we want occasional progress output to prevent abort
# due to timeout with no output, but we also need to stop sending
# progress output if there has been no actual progress, so that
# the job can time out appropriately (CirecleCI: 10m) in case of
# a hung test. But without special output, it is extremely
# annoying to diagnose which test is hung, so we add that using
# `ps` below.
if($opt::progress and
($Global::is_terminal or (time() - $last_progress_time) >= 30)) {
if($opt::progress) {
my %progress = progress();
if($last_header ne $progress{'header'}) {
print $Global::original_stderr "\n", $progress{'header'}, "\n";
$last_header = $progress{'header'};
}
if ($Global::is_terminal) {
print $Global::original_stderr "\r",$progress{'status'};
}
if ($last_left > $Global::left) {
if (not $Global::is_terminal) {
print $Global::original_stderr $progress{'status'},"\n";
}
$last_progress_time = time();
$ps_reported = 0;
} elsif (not $ps_reported and (time() - $last_progress_time) >= 60) {
# No progress in at least 60 seconds: run ps
print $Global::original_stderr "\n";
system("ps", "-wf");
$ps_reported = 1;
}
$last_left = $Global::left;
flush $Global::original_stderr;
}
if($Global::total_running < $Global::max_jobs_running
@ -1954,7 +1921,7 @@ sub drain_job_queue {
not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
if($opt::progress) {
my %progress = progress();
print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n";
print $Global::original_stderr "\r", $progress{'status'}, "\n";
flush $Global::original_stderr;
}
}
@ -1991,7 +1958,6 @@ sub progress {
compute_eta();
$eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
$this_eta, $left, $avgtime);
$Global::left = $left;
}
my $termcols = terminal_columns();
my @workers = sort keys %Global::host;

209
build_tools/precommit_checker.py Executable file
View File

@ -0,0 +1,209 @@
#!/usr/bin/env python2.7
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import commands
import subprocess
import sys
import re
import os
import time
#
# Simple logger
#
class Log:
def __init__(self, filename):
self.filename = filename
self.f = open(self.filename, 'w+', 0)
def caption(self, str):
line = "\n##### %s #####\n" % str
if self.f:
self.f.write("%s \n" % line)
else:
print(line)
def error(self, str):
data = "\n\n##### ERROR ##### %s" % str
if self.f:
self.f.write("%s \n" % data)
else:
print(data)
def log(self, str):
if self.f:
self.f.write("%s \n" % str)
else:
print(str)
#
# Shell Environment
#
class Env(object):
def __init__(self, logfile, tests):
self.tests = tests
self.log = Log(logfile)
def shell(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status = subprocess.call("cd %s; %s" % (path, cmd), shell=True,
stdout=self.log.f, stderr=self.log.f)
self.log.log("status = %s" % status)
self.log.log("============================================== \n\n")
return status
def GetOutput(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status, out = commands.getstatusoutput(cmd)
self.log.log("status = %s" % status)
self.log.log("out = %s" % out)
self.log.log("============================================== \n\n")
return status, out
#
# Pre-commit checker
#
class PreCommitChecker(Env):
def __init__(self, args):
Env.__init__(self, args.logfile, args.tests)
self.ignore_failure = args.ignore_failure
#
# Get commands for a given job from the determinator file
#
def get_commands(self, test):
status, out = self.GetOutput(
"RATIO=1 build_tools/rocksdb-lego-determinator %s" % test, ".")
return status, out
#
# Run a specific CI job
#
def run_test(self, test):
self.log.caption("Running test %s locally" % test)
# get commands for the CI job determinator
status, cmds = self.get_commands(test)
if status != 0:
self.log.error("Error getting commands for test %s" % test)
return False
# Parse the JSON to extract the commands to run
cmds = re.findall("'shell':'([^\']*)'", cmds)
if len(cmds) == 0:
self.log.log("No commands found")
return False
# Run commands
for cmd in cmds:
# Replace J=<..> with the local environment variable
if "J" in os.environ:
cmd = cmd.replace("J=1", "J=%s" % os.environ["J"])
cmd = cmd.replace("make ", "make -j%s " % os.environ["J"])
# Run the command
status = self.shell(cmd, ".")
if status != 0:
self.log.error("Error running command %s for test %s"
% (cmd, test))
return False
return True
#
# Run specified CI jobs
#
def run_tests(self):
if not self.tests:
self.log.error("Invalid args. Please provide tests")
return False
self.print_separator()
self.print_row("TEST", "RESULT")
self.print_separator()
result = True
for test in self.tests:
start_time = time.time()
self.print_test(test)
result = self.run_test(test)
elapsed_min = (time.time() - start_time) / 60
if not result:
self.log.error("Error running test %s" % test)
self.print_result("FAIL (%dm)" % elapsed_min)
if not self.ignore_failure:
return False
result = False
else:
self.print_result("PASS (%dm)" % elapsed_min)
self.print_separator()
return result
#
# Print a line
#
def print_separator(self):
print("".ljust(60, "-"))
#
# Print two colums
#
def print_row(self, c0, c1):
print("%s%s" % (c0.ljust(40), c1.ljust(20)))
def print_test(self, test):
print(test.ljust(40), end="")
sys.stdout.flush()
def print_result(self, result):
print(result.ljust(20))
#
# Main
#
parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.')
# --log <logfile>
parser.add_argument('--logfile', default='/tmp/precommit-check.log',
help='Log file. Default is /tmp/precommit-check.log')
# --ignore_failure
parser.add_argument('--ignore_failure', action='store_true', default=False,
help='Stop when an error occurs')
# <test ....>
parser.add_argument('tests', nargs='+',
help='CI test(s) to run. e.g: unit punit asan tsan ubsan')
args = parser.parse_args()
checker = PreCommitChecker(args)
print("Please follow log %s" % checker.log.filename)
if not checker.run_tests():
print("Error running tests. Please check log file %s"
% checker.log.filename)
sys.exit(1)
sys.exit(0)

View File

@ -20,11 +20,26 @@ STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)}
function cleanup {
rm -rf $DATA_DIR
rm -f $STAT_FILE.*
rm -f $STAT_FILE.fillseq
rm -f $STAT_FILE.readrandom
rm -f $STAT_FILE.overwrite
rm -f $STAT_FILE.memtablefillreadrandom
}
trap cleanup EXIT
if [ -z $GIT_BRANCH ]; then
git_br=`git rev-parse --abbrev-ref HEAD`
else
git_br=$(basename $GIT_BRANCH)
fi
if [ $git_br == "master" ]; then
git_br=""
else
git_br="."$git_br
fi
make release
# measure fillseq + fill up the DB for overwrite benchmark
@ -258,6 +273,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--value_size=100 \
--compression_type=none \
--compression_ratio=1 \
--hard_rate_limit=2 \
--write_buffer_size=134217728 \
--max_write_buffer_number=4 \
--level0_file_num_compaction_trigger=8 \
@ -270,10 +286,12 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--sync=0 \
--verify_checksum=1 \
--delete_obsolete_files_period_micros=314572800 \
--max_grandparent_overlap_factor=10 \
--use_plain_table=1 \
--open_files=-1 \
--mmap_read=1 \
--mmap_write=0 \
--memtablerep=prefix_hash \
--bloom_bits=10 \
--bloom_locality=1 \
--perf_level=0"
@ -360,7 +378,7 @@ function send_to_ods {
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \
--connect-timeout 60
}

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@ $RunOnly.Add("c_test") | Out-Null
$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null
$RunOnly.Add("merge_test") | Out-Null
$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written
$RunOnly.Add("backup_engine_test") | Out-Null # Disabled
$RunOnly.Add("backupable_db_test") | Out-Null # Disabled
$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest
if($RunAll -and $SuiteRun -ne "") {
@ -491,3 +491,5 @@ if(!$script:success) {
}
exit 0

View File

@ -9,7 +9,6 @@ OUTPUT=""
function log_header()
{
echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
}
@ -19,7 +18,7 @@ function log_variable()
}
TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
@ -52,8 +51,6 @@ function get_lib_base()
result=`ls -1d $result/*/ | head -n1`
echo Finding link $result
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
__res_var=`echo $__res_var | tr - _`
@ -64,10 +61,10 @@ function get_lib_base()
}
###########################################################
# platform010 dependencies #
# platform007 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform010.sh"
OUTPUT="$BASEDIR/dependencies_platform007.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -75,42 +72,40 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 11.x platform010
get_lib_base glibc 2.34 platform010
get_lib_base snappy LATEST platform010
get_lib_base zlib LATEST platform010
get_lib_base bzip2 LATEST platform010
get_lib_base lz4 LATEST platform010
get_lib_base zstd LATEST platform010
get_lib_base gflags LATEST platform010
get_lib_base jemalloc LATEST platform010
get_lib_base numa LATEST platform010
get_lib_base libunwind LATEST platform010
get_lib_base tbb 2018_U5 platform010
get_lib_base liburing LATEST platform010
get_lib_base benchmark LATEST platform010
get_lib_base libgcc 7.x platform007
get_lib_base glibc 2.26 platform007
get_lib_base snappy LATEST platform007
get_lib_base zlib LATEST platform007
get_lib_base bzip2 LATEST platform007
get_lib_base lz4 LATEST platform007
get_lib_base zstd LATEST platform007
get_lib_base gflags LATEST platform007
get_lib_base jemalloc LATEST platform007
get_lib_base numa LATEST platform007
get_lib_base libunwind LATEST platform007
get_lib_base tbb LATEST platform007
get_lib_base liburing LATEST platform007
get_lib_base kernel-headers fb platform010
get_lib_base kernel-headers fb platform007
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform010
get_lib_base lua 5.3.4 platform010
get_lib_base valgrind LATEST platform007
get_lib_base lua 5.3.4 platform007
git diff $OUTPUT
###########################################################
# platform009 dependencies #
# 5.x dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform009.sh"
OUTPUT="$BASEDIR/dependencies.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -118,32 +113,70 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/9.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/9.0.0/platform009/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/5.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 9.x platform009
get_lib_base glibc 2.30 platform009
get_lib_base snappy LATEST platform009
get_lib_base zlib LATEST platform009
get_lib_base bzip2 LATEST platform009
get_lib_base lz4 LATEST platform009
get_lib_base zstd LATEST platform009
get_lib_base gflags LATEST platform009
get_lib_base jemalloc LATEST platform009
get_lib_base numa LATEST platform009
get_lib_base libunwind LATEST platform009
get_lib_base tbb 2018_U5 platform009
get_lib_base liburing LATEST platform009
get_lib_base benchmark LATEST platform009
get_lib_base libgcc 5.x gcc-5-glibc-2.23
get_lib_base glibc 2.23 gcc-5-glibc-2.23
get_lib_base snappy LATEST gcc-5-glibc-2.23
get_lib_base zlib LATEST gcc-5-glibc-2.23
get_lib_base bzip2 LATEST gcc-5-glibc-2.23
get_lib_base lz4 LATEST gcc-5-glibc-2.23
get_lib_base zstd LATEST gcc-5-glibc-2.23
get_lib_base gflags LATEST gcc-5-glibc-2.23
get_lib_base jemalloc LATEST gcc-5-glibc-2.23
get_lib_base numa LATEST gcc-5-glibc-2.23
get_lib_base libunwind LATEST gcc-5-glibc-2.23
get_lib_base tbb LATEST gcc-5-glibc-2.23
get_lib_base kernel-headers fb platform009
get_lib_base kernel-headers 4.0.9-36_fbk5_2933_gd092e3f gcc-5-glibc-2.23
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform009
get_lib_base lua 5.3.4 platform009
get_lib_base valgrind LATEST gcc-5-glibc-2.23
get_lib_base lua 5.2.3 gcc-5-glibc-2.23
git diff $OUTPUT
###########################################################
# 4.8.1 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_4.8.1.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing 4.8.1 dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17
get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17
get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17
get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17
get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17
get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17
get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17
get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17
get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17
get_lib_base numa LATEST gcc-4.8.1-glibc-2.17
get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17
get_lib_base tbb 4.0_update2 gcc-4.8.1-glibc-2.17
get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17
get_lib_base binutils LATEST centos6-native
get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17
get_lib_base lua 5.2.3 centos6-native
git diff $OUTPUT

View File

@ -13,7 +13,7 @@ int main() {
return 1;
}
#else
#include "rocksdb/cache_bench_tool.h"
#include <rocksdb/cache_bench_tool.h>
int main(int argc, char** argv) {
return ROCKSDB_NAMESPACE::cache_bench_tool(argc, argv);
}

View File

@ -5,14 +5,11 @@
#ifdef GFLAGS
#include <cinttypes>
#include <cstddef>
#include <cstdio>
#include <limits>
#include <memory>
#include <set>
#include <sstream>
#include "db/db_impl/db_impl.h"
#include "monitoring/histogram.h"
#include "port/port.h"
#include "rocksdb/cache.h"
@ -21,8 +18,6 @@
#include "rocksdb/env.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table_properties.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/cachable_entry.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
@ -78,57 +73,6 @@ static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;
DEFINE_bool(use_clock_cache, false, "");
// ## BEGIN stress_cache_key sub-tool options ##
// See class StressCacheKey below.
DEFINE_bool(stress_cache_key, false,
"If true, run cache key stress test instead");
DEFINE_uint32(
sck_files_per_day, 2500000,
"(-stress_cache_key) Simulated files generated per simulated day");
// NOTE: Giving each run a specified lifetime, rather than e.g. "until
// first collision" ensures equal skew from start-up, when collisions are
// less likely.
DEFINE_uint32(sck_days_per_run, 90,
"(-stress_cache_key) Number of days to simulate in each run");
// NOTE: The number of observed collisions directly affects the relative
// accuracy of the predicted probabilities. 15 observations should be well
// within factor-of-2 accuracy.
DEFINE_uint32(
sck_min_collision, 15,
"(-stress_cache_key) Keep running until this many collisions seen");
// sck_file_size_mb can be thought of as average file size. The simulation is
// not precise enough to care about the distribution of file sizes; other
// simulations (https://github.com/pdillinger/unique_id/tree/main/monte_carlo)
// indicate the distribution only makes a small difference (e.g. < 2x factor)
DEFINE_uint32(
sck_file_size_mb, 32,
"(-stress_cache_key) Simulated file size in MiB, for accounting purposes");
DEFINE_uint32(sck_reopen_nfiles, 100,
"(-stress_cache_key) Simulate DB re-open average every n files");
DEFINE_uint32(sck_restarts_per_day, 24,
"(-stress_cache_key) Average simulated process restarts per day "
"(across DBs)");
DEFINE_uint32(
sck_db_count, 100,
"(-stress_cache_key) Parallel DBs in simulation sharing a block cache");
DEFINE_uint32(
sck_table_bits, 20,
"(-stress_cache_key) Log2 number of tracked (live) files (across DBs)");
// sck_keep_bits being well below full 128 bits amplifies the collision
// probability so that the true probability can be estimated through observed
// collisions. (More explanation below.)
DEFINE_uint32(
sck_keep_bits, 50,
"(-stress_cache_key) Number of bits to keep from each cache key (<= 64)");
// sck_randomize is used to validate whether cache key is performing "better
// than random." Even with this setting, file offsets are not randomized.
DEFINE_bool(sck_randomize, false,
"(-stress_cache_key) Randomize (hash) cache key");
// See https://github.com/facebook/rocksdb/pull/9058
DEFINE_bool(sck_footer_unique_id, false,
"(-stress_cache_key) Simulate using proposed footer unique id");
// ## END stress_cache_key sub-tool options ##
namespace ROCKSDB_NAMESPACE {
class CacheBench;
@ -276,7 +220,7 @@ class CacheBench {
if (skewed_) {
uint64_t max_key = max_key_;
while (max_key >>= 1) max_log_++;
if (max_key > (static_cast<uint64_t>(1) << max_log_)) max_log_++;
if (max_key > (1u << max_log_)) max_log_++;
}
if (FLAGS_use_clock_cache) {
@ -436,8 +380,7 @@ class CacheBench {
<< "Average key size: "
<< (1.0 * total_key_size / total_entry_count) << "\n"
<< "Average charge: "
<< BytesToHumanString(static_cast<uint64_t>(
1.0 * total_charge / total_entry_count))
<< BytesToHumanString(1.0 * total_charge / total_entry_count)
<< "\n"
<< "Unique deleters: " << deleters.size() << "\n";
*stats_report = ostr.str();
@ -510,9 +453,8 @@ class CacheBench {
timer.Start();
Slice key = gen.GetRand(thread->rnd, max_key_, max_log_);
uint64_t random_op = thread->rnd.Next();
Cache::CreateCallback create_cb = [](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
Cache::CreateCallback create_cb =
[](void* buf, size_t size, void** out_obj, size_t* charge) -> Status {
*out_obj = reinterpret_cast<void*>(new char[size]);
memcpy(*out_obj, buf, size);
*charge = size;
@ -605,303 +547,9 @@ class CacheBench {
}
};
// cache_bench -stress_cache_key is an independent embedded tool for
// estimating the probability of CacheKey collisions through simulation.
// At a high level, it simulates generating SST files over many months,
// keeping them in the DB and/or cache for some lifetime while staying
// under resource caps, and checking for any cache key collisions that
// arise among the set of live files. For efficient simulation, we make
// some simplifying "pessimistic" assumptions (that only increase the
// chance of the simulation reporting a collision relative to the chance
// of collision in practice):
// * Every generated file has a cache entry for every byte offset in the
// file (contiguous range of cache keys)
// * All of every file is cached for its entire lifetime. (Here "lifetime"
// is technically the union of DB and Cache lifetime, though we only
// model a generous DB lifetime, where space usage is always maximized.
// In a effective Cache, lifetime in cache can only substantially exceed
// lifetime in DB if there is little cache activity; cache activity is
// required to hit cache key collisions.)
//
// It would be possible to track an exact set of cache key ranges for the
// set of live files, but we would have no hope of observing collisions
// (overlap in live files) in our simulation. We need to employ some way
// of amplifying collision probability that allows us to predict the real
// collision probability by extrapolation from observed collisions. Our
// basic approach is to reduce each cache key range down to some smaller
// number of bits, and limiting to bits that are shared over the whole
// range. Now we can observe collisions using a set of smaller stripped-down
// (reduced) cache keys. Let's do some case analysis to understand why this
// works:
// * No collision in reduced key - because the reduction is a pure function
// this implies no collision in the full keys
// * Collision detected between two reduced keys - either
// * The reduction has dropped some structured uniqueness info (from one of
// session counter or file number; file offsets are never materialized here).
// This can only artificially inflate the observed and extrapolated collision
// probabilities. We only have to worry about this in designing the reduction.
// * The reduction has preserved all the structured uniqueness in the cache
// key, which means either
// * REJECTED: We have a uniqueness bug in generating cache keys, where
// structured uniqueness info should have been different but isn't. In such a
// case, increasing by 1 the number of bits kept after reduction would not
// reduce observed probabilities by half. (In our observations, the
// probabilities are reduced approximately by half.)
// * ACCEPTED: The lost unstructured uniqueness in the key determines the
// probability that an observed collision would imply an overlap in ranges.
// In short, dropping n bits from key would increase collision probability by
// 2**n, assuming those n bits have full entropy in unstructured uniqueness.
//
// But we also have to account for the key ranges based on file size. If file
// sizes are roughly 2**b offsets, using XOR in 128-bit cache keys for
// "ranges", we know from other simulations (see
// https://github.com/pdillinger/unique_id/) that that's roughly equivalent to
// (less than 2x higher collision probability) using a cache key of size
// 128 - b bits for the whole file. (This is the only place we make an
// "optimistic" assumption, which is more than offset by the real
// implementation stripping off 2 lower bits from block byte offsets for cache
// keys. The simulation assumes byte offsets, which is net pessimistic.)
//
// So to accept the extrapolation as valid, we need to be confident that all
// "lost" bits, excluding those covered by file offset, are full entropy.
// Recall that we have assumed (verifiably, safely) that other structured data
// (file number and session counter) are kept, not lost. Based on the
// implementation comments for OffsetableCacheKey, the only potential hole here
// is that we only have ~103 bits of entropy in "all new" session IDs, and in
// extreme cases, there might be only 1 DB ID. However, because the upper ~39
// bits of session ID are hashed, the combination of file number and file
// offset only has to add to 25 bits (or more) to ensure full entropy in
// unstructured uniqueness lost in the reduction. Typical file size of 32MB
// suffices (at least for simulation purposes where we assume each file offset
// occupies a cache key).
//
// Example results in comments on OffsetableCacheKey.
class StressCacheKey {
public:
void Run() {
if (FLAGS_sck_footer_unique_id) {
// Proposed footer unique IDs are DB-independent and session-independent
// (but process-dependent) which is most easily simulated here by
// assuming 1 DB and (later below) no session resets without process
// reset.
FLAGS_sck_db_count = 1;
}
// Describe the simulated workload
uint64_t mb_per_day =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_file_size_mb;
printf("Total cache or DBs size: %gTiB Writing %g MiB/s or %gTiB/day\n",
FLAGS_sck_file_size_mb / 1024.0 / 1024.0 *
std::pow(2.0, FLAGS_sck_table_bits),
mb_per_day / 86400.0, mb_per_day / 1024.0 / 1024.0);
// For extrapolating probability of any collisions from a number of
// observed collisions
multiplier_ = std::pow(2.0, 128 - FLAGS_sck_keep_bits) /
(FLAGS_sck_file_size_mb * 1024.0 * 1024.0);
printf(
"Multiply by %g to correct for simulation losses (but still assume "
"whole file cached)\n",
multiplier_);
restart_nfiles_ = FLAGS_sck_files_per_day / FLAGS_sck_restarts_per_day;
double without_ejection =
std::pow(1.414214, FLAGS_sck_keep_bits) / FLAGS_sck_files_per_day;
// This should be a lower bound for -sck_randomize, usually a terribly
// rough lower bound.
// If observation is worse than this, then something has gone wrong.
printf(
"Without ejection, expect random collision after %g days (%g "
"corrected)\n",
without_ejection, without_ejection * multiplier_);
double with_full_table =
std::pow(2.0, FLAGS_sck_keep_bits - FLAGS_sck_table_bits) /
FLAGS_sck_files_per_day;
// This is an alternate lower bound for -sck_randomize, usually pretty
// accurate. Our cache keys should usually perform "better than random"
// but always no worse. (If observation is substantially worse than this,
// then something has gone wrong.)
printf(
"With ejection and full table, expect random collision after %g "
"days (%g corrected)\n",
with_full_table, with_full_table * multiplier_);
collisions_ = 0;
// Run until sufficient number of observed collisions.
for (int i = 1; collisions_ < FLAGS_sck_min_collision; i++) {
RunOnce();
if (collisions_ == 0) {
printf(
"No collisions after %d x %u days "
" \n",
i, FLAGS_sck_days_per_run);
} else {
double est = 1.0 * i * FLAGS_sck_days_per_run / collisions_;
printf("%" PRIu64
" collisions after %d x %u days, est %g days between (%g "
"corrected) \n",
collisions_, i, FLAGS_sck_days_per_run, est, est * multiplier_);
}
}
}
void RunOnce() {
// Re-initialized simulated state
const size_t db_count = FLAGS_sck_db_count;
dbs_.reset(new TableProperties[db_count]{});
const size_t table_mask = (size_t{1} << FLAGS_sck_table_bits) - 1;
table_.reset(new uint64_t[table_mask + 1]{});
if (FLAGS_sck_keep_bits > 64) {
FLAGS_sck_keep_bits = 64;
}
// Details of which bits are dropped in reduction
uint32_t shift_away = 64 - FLAGS_sck_keep_bits;
// Shift away fewer potential file number bits (b) than potential
// session counter bits (a).
uint32_t shift_away_b = shift_away / 3;
uint32_t shift_away_a = shift_away - shift_away_b;
process_count_ = 0;
session_count_ = 0;
ResetProcess();
Random64 r{std::random_device{}()};
uint64_t max_file_count =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_days_per_run;
uint64_t file_size = FLAGS_sck_file_size_mb * uint64_t{1024} * 1024U;
uint32_t report_count = 0;
uint32_t collisions_this_run = 0;
size_t db_i = 0;
for (uint64_t file_count = 1; file_count <= max_file_count;
++file_count, ++db_i) {
// Round-robin through DBs (this faster than %)
if (db_i >= db_count) {
db_i = 0;
}
// Any other periodic actions before simulating next file
if (!FLAGS_sck_footer_unique_id && r.OneIn(FLAGS_sck_reopen_nfiles)) {
ResetSession(db_i);
} else if (r.OneIn(restart_nfiles_)) {
ResetProcess();
}
// Simulate next file
OffsetableCacheKey ock;
dbs_[db_i].orig_file_number += 1;
// skip some file numbers for other file kinds, except in footer unique
// ID, orig_file_number here tracks process-wide generated SST file
// count.
if (!FLAGS_sck_footer_unique_id) {
dbs_[db_i].orig_file_number += (r.Next() & 3);
}
bool is_stable;
BlockBasedTable::SetupBaseCacheKey(&dbs_[db_i], /* ignored */ "",
/* ignored */ 42, file_size, &ock,
&is_stable);
assert(is_stable);
// Get a representative cache key, which later we analytically generalize
// to a range.
CacheKey ck = ock.WithOffset(0);
uint64_t reduced_key;
if (FLAGS_sck_randomize) {
reduced_key = GetSliceHash64(ck.AsSlice()) >> shift_away;
} else if (FLAGS_sck_footer_unique_id) {
// Special case: keep only file number, not session counter
uint32_t a = DecodeFixed32(ck.AsSlice().data() + 4) >> shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
} else {
// Try to keep file number and session counter (shift away other bits)
uint32_t a = DecodeFixed32(ck.AsSlice().data()) << shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
}
if (reduced_key == 0) {
// Unlikely, but we need to exclude tracking this value because we
// use it to mean "empty" in table. This case is OK as long as we
// don't hit it often.
printf("Hit Zero! \n");
file_count--;
continue;
}
uint64_t h =
NPHash64(reinterpret_cast<char*>(&reduced_key), sizeof(reduced_key));
// Skew expected lifetimes, for high variance (super-Poisson) variance
// in actual lifetimes.
size_t pos =
std::min(Lower32of64(h) & table_mask, Upper32of64(h) & table_mask);
if (table_[pos] == reduced_key) {
collisions_this_run++;
// Our goal is to predict probability of no collisions, not expected
// number of collisions. To make the distinction, we have to get rid
// of observing correlated collisions, which this takes care of:
ResetProcess();
} else {
// Replace (end of lifetime for file that was in this slot)
table_[pos] = reduced_key;
}
if (++report_count == FLAGS_sck_files_per_day) {
report_count = 0;
// Estimate fill %
size_t incr = table_mask / 1000;
size_t sampled_count = 0;
for (size_t i = 0; i <= table_mask; i += incr) {
if (table_[i] != 0) {
sampled_count++;
}
}
// Report
printf(
"%" PRIu64 " days, %" PRIu64 " proc, %" PRIu64
" sess, %u coll, occ %g%%, ejected %g%% \r",
file_count / FLAGS_sck_files_per_day, process_count_,
session_count_, collisions_this_run, 100.0 * sampled_count / 1000.0,
100.0 * (1.0 - sampled_count / 1000.0 * table_mask / file_count));
fflush(stdout);
}
}
collisions_ += collisions_this_run;
}
void ResetSession(size_t i) {
dbs_[i].db_session_id = DBImpl::GenerateDbSessionId(nullptr);
session_count_++;
}
void ResetProcess() {
process_count_++;
DBImpl::TEST_ResetDbSessionIdGen();
for (size_t i = 0; i < FLAGS_sck_db_count; ++i) {
ResetSession(i);
}
if (FLAGS_sck_footer_unique_id) {
// For footer unique ID, this tracks process-wide generated SST file
// count.
dbs_[0].orig_file_number = 0;
}
}
private:
// Use db_session_id and orig_file_number from TableProperties
std::unique_ptr<TableProperties[]> dbs_;
std::unique_ptr<uint64_t[]> table_;
uint64_t process_count_ = 0;
uint64_t session_count_ = 0;
uint64_t collisions_ = 0;
uint32_t restart_nfiles_ = 0;
double multiplier_ = 0.0;
};
int cache_bench_tool(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_stress_cache_key) {
// Alternate tool
StressCacheKey().Run();
return 0;
}
if (FLAGS_threads <= 0) {
fprintf(stderr, "threads number <= 0\n");
exit(1);

View File

@ -11,7 +11,7 @@
namespace ROCKSDB_NAMESPACE {
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"DataBlock",
"FilterBlock",
"FilterMetaBlock",
@ -19,13 +19,10 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"IndexBlock",
"OtherBlock",
"WriteBuffer",
"CompressionDictionaryBuildingBuffer",
"FilterConstruction",
"BlockBasedTableReader",
"Misc",
}};
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"data-block",
"filter-block",
"filter-meta-block",
@ -33,78 +30,19 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"index-block",
"other-block",
"write-buffer",
"compression-dictionary-building-buffer",
"filter-construction",
"block-based-table-reader",
"misc",
}};
const std::string& GetCacheEntryRoleName(CacheEntryRole role) {
return kCacheEntryRoleToHyphenString[static_cast<size_t>(role)];
}
const std::string& BlockCacheEntryStatsMapKeys::CacheId() {
static const std::string kCacheId = "id";
return kCacheId;
}
const std::string& BlockCacheEntryStatsMapKeys::CacheCapacityBytes() {
static const std::string kCacheCapacityBytes = "capacity";
return kCacheCapacityBytes;
}
const std::string&
BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds() {
static const std::string kLastCollectionDurationSeconds =
"secs_for_last_collection";
return kLastCollectionDurationSeconds;
}
const std::string& BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds() {
static const std::string kLastCollectionAgeSeconds =
"secs_since_last_collection";
return kLastCollectionAgeSeconds;
}
namespace {
std::string GetPrefixedCacheEntryRoleName(const std::string& prefix,
CacheEntryRole role) {
const std::string& role_name = GetCacheEntryRoleName(role);
std::string prefixed_role_name;
prefixed_role_name.reserve(prefix.size() + role_name.size());
prefixed_role_name.append(prefix);
prefixed_role_name.append(role_name);
return prefixed_role_name;
}
} // namespace
std::string BlockCacheEntryStatsMapKeys::EntryCount(CacheEntryRole role) {
const static std::string kPrefix = "count.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedBytes(CacheEntryRole role) {
const static std::string kPrefix = "bytes.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedPercent(CacheEntryRole role) {
const static std::string kPrefix = "percent.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
namespace {
struct Registry {
std::mutex mutex;
UnorderedMap<Cache::DeleterFn, CacheEntryRole> role_map;
std::unordered_map<Cache::DeleterFn, CacheEntryRole> role_map;
void Register(Cache::DeleterFn fn, CacheEntryRole role) {
std::lock_guard<std::mutex> lock(mutex);
role_map[fn] = role;
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> Copy() {
std::unordered_map<Cache::DeleterFn, CacheEntryRole> Copy() {
std::lock_guard<std::mutex> lock(mutex);
return role_map;
}
@ -121,7 +59,7 @@ void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role) {
GetRegistry().Register(fn, role);
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
std::unordered_map<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
return GetRegistry().Copy();
}

View File

@ -7,17 +7,38 @@
#include <array>
#include <cstdint>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include "rocksdb/cache.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
extern std::array<std::string, kNumCacheEntryRoles>
// Classifications of block cache entries, for reporting statistics
enum class CacheEntryRole {
// Block-based table data block
kDataBlock,
// Block-based table filter block (full or partitioned)
kFilterBlock,
// Block-based table metadata block for partitioned filter
kFilterMetaBlock,
// Block-based table deprecated filter block (old "block-based" filter)
kDeprecatedFilterBlock,
// Block-based table index block
kIndexBlock,
// Other kinds of block-based table block
kOtherBlock,
// WriteBufferManager reservations to account for memtable usage
kWriteBuffer,
// Default bucket, for miscellaneous cache entries. Do not use for
// entries that could potentially add up to large usage.
kMisc,
};
constexpr uint32_t kNumCacheEntryRoles =
static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
extern std::array<const char*, kNumCacheEntryRoles>
kCacheEntryRoleToCamelString;
extern std::array<std::string, kNumCacheEntryRoles>
extern std::array<const char*, kNumCacheEntryRoles>
kCacheEntryRoleToHyphenString;
// To associate cache entries with their role, we use a hack on the
@ -44,7 +65,7 @@ void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role);
// * This is suitable for preparing for batch operations, like with
// CacheEntryStatsCollector.
// * The number of mappings should be sufficiently small (dozens).
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
std::unordered_map<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
// ************************************************************** //
// An automatic registration infrastructure. This enables code
@ -64,9 +85,7 @@ struct RegisteredDeleter {
// These have global linkage to help ensure compiler optimizations do not
// break uniqueness for each <T,R>
static void Delete(const Slice& /* key */, void* value) {
// Supports T == Something[], unlike delete operator
std::default_delete<T>()(
static_cast<typename std::remove_extent<T>::type*>(value));
delete static_cast<T*>(value);
}
};
@ -74,9 +93,9 @@ template <CacheEntryRole R>
struct RegisteredNoopDeleter {
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
static void Delete(const Slice& /* key */, void* /* value */) {
// Here was `assert(value == nullptr);` but we can also put pointers
// to static data in Cache, for testing at least.
static void Delete(const Slice& /* key */, void* value) {
(void)value;
assert(value == nullptr);
}
};

View File

@ -11,12 +11,10 @@
#include <mutex>
#include "cache/cache_helpers.h"
#include "cache/cache_key.h"
#include "port/lang.h"
#include "rocksdb/cache.h"
#include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/coding_lean.h"
namespace ROCKSDB_NAMESPACE {
@ -113,7 +111,13 @@ class CacheEntryStatsCollector {
// entry in cache until all refs are destroyed.
static Status GetShared(Cache *cache, SystemClock *clock,
std::shared_ptr<CacheEntryStatsCollector> *ptr) {
const Slice &cache_key = GetCacheKey();
std::array<uint64_t, 3> cache_key_data{
{// First 16 bytes == md5 of class name
0x7eba5a8fb5437c90U, 0x8ca68c9b11655855U,
// Last 8 bytes based on a function pointer to make unique for each
// template instantiation
reinterpret_cast<uint64_t>(&CacheEntryStatsCollector::GetShared)}};
Slice cache_key = GetSlice(&cache_key_data);
Cache::Handle *h = cache->Lookup(cache_key);
if (h == nullptr) {
@ -161,13 +165,6 @@ class CacheEntryStatsCollector {
delete static_cast<CacheEntryStatsCollector *>(value);
}
static const Slice &GetCacheKey() {
// For each template instantiation
static CacheKey ckey = CacheKey::CreateUniqueForProcessLifetime();
static Slice ckey_slice = ckey.AsSlice();
return ckey_slice;
}
std::mutex saved_mutex_;
Stats saved_stats_;

344
cache/cache_key.cc vendored
View File

@ -1,344 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/cache_key.h"
#include <algorithm>
#include <atomic>
#include "rocksdb/cache.h"
#include "table/unique_id_impl.h"
#include "util/hash.h"
#include "util/math.h"
namespace ROCKSDB_NAMESPACE {
// Value space plan for CacheKey:
//
// session_etc64_ | offset_etc64_ | Only generated by
// ---------------+---------------+------------------------------------------
// 0 | 0 | Reserved for "empty" CacheKey()
// 0 | > 0, < 1<<63 | CreateUniqueForCacheLifetime
// 0 | >= 1<<63 | CreateUniqueForProcessLifetime
// > 0 | any | OffsetableCacheKey.WithOffset
CacheKey CacheKey::CreateUniqueForCacheLifetime(Cache *cache) {
// +1 so that we can reserve all zeros for "unset" cache key
uint64_t id = cache->NewId() + 1;
// Ensure we don't collide with CreateUniqueForProcessLifetime
assert((id >> 63) == 0U);
return CacheKey(0, id);
}
CacheKey CacheKey::CreateUniqueForProcessLifetime() {
// To avoid colliding with CreateUniqueForCacheLifetime, assuming
// Cache::NewId counts up from zero, here we count down from UINT64_MAX.
// If this ever becomes a point of contention, we could sub-divide the
// space and use CoreLocalArray.
static std::atomic<uint64_t> counter{UINT64_MAX};
uint64_t id = counter.fetch_sub(1, std::memory_order_relaxed);
// Ensure we don't collide with CreateUniqueForCacheLifetime
assert((id >> 63) == 1U);
return CacheKey(0, id);
}
// Value plan for CacheKeys from OffsetableCacheKey, assuming that
// db_session_ids are generated from a base_session_id and
// session_id_counter (by SemiStructuredUniqueIdGen+EncodeSessionId
// in DBImpl::GenerateDbSessionId):
//
// Conceptual inputs:
// db_id (unstructured, from GenerateRawUniqueId or equiv)
// * could be shared between cloned DBs but rare
// * could be constant, if session id suffices
// base_session_id (unstructured, from GenerateRawUniqueId)
// session_id_counter (structured)
// * usually much smaller than 2**24
// file_number (structured)
// * usually smaller than 2**24
// offset_in_file (structured, might skip lots of values)
// * usually smaller than 2**32
// max_offset determines placement of file_number to prevent
// overlapping with offset
//
// Outputs come from bitwise-xor of the constituent pieces, low bits on left:
//
// |------------------------- session_etc64 -------------------------|
// | +++++++++++++++ base_session_id (lower 64 bits) +++++++++++++++ |
// |-----------------------------------------------------------------|
// | session_id_counter ...| |
// |-----------------------------------------------------------------|
// | | ... file_number |
// | | overflow & meta |
// |-----------------------------------------------------------------|
//
//
// |------------------------- offset_etc64 --------------------------|
// | hash of: ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
// | * base_session_id (upper ~39 bits) |
// | * db_id (~122 bits entropy) |
// |-----------------------------------------------------------------|
// | offset_in_file ............... | |
// |-----------------------------------------------------------------|
// | | file_number, 0-3 |
// | | lower bytes |
// |-----------------------------------------------------------------|
//
// Based on max_offset, a maximal number of bytes 0..3 is chosen for
// including from lower bits of file_number in offset_etc64. The choice
// is encoded in two bits of metadata going into session_etc64, though
// the common case of 3 bytes is encoded as 0 so that session_etc64
// is unmodified by file_number concerns in the common case.
//
// There is nothing preventing "file number overflow & meta" from meeting
// and overlapping with session_id_counter, but reaching such a case requires
// an intractable combination of large file offsets (thus at least some large
// files), large file numbers (thus large number of files generated), and
// large number of session IDs generated in a single process. A trillion each
// (2**40) of session ids, offsets, and file numbers comes to 120 bits.
// With two bits of metadata and byte granularity, this is on the verge of
// overlap, but even in the overlap case, it doesn't seem likely that
// a file from billions of files or session ids ago will still be live
// or cached.
//
// In fact, if our SST files are all < 4TB (see
// BlockBasedTable::kMaxFileSizeStandardEncoding), then SST files generated
// in a single process are guaranteed to have unique cache keys, unless/until
// number session ids * max file number = 2**86, e.g. 1 trillion DB::Open in
// a single process and 64 trillion files generated. Even at that point, to
// see a collision we would need a miraculous re-synchronization of session
// id and file number, along with a live file or stale cache entry from
// trillions of files ago.
//
// How https://github.com/pdillinger/unique_id applies here:
// Every bit of output always includes "unstructured" uniqueness bits and
// often combines with "structured" uniqueness bits. The "unstructured" bits
// change infrequently: only when we cannot guarantee our state tracking for
// "structured" uniqueness hasn't been cloned. Using a static
// SemiStructuredUniqueIdGen for db_session_ids, this means we only get an
// "all new" session id when a new process uses RocksDB. (Between processes,
// we don't know if a DB or other persistent storage has been cloned. We
// assume that if VM hot cloning is used, subsequently generated SST files
// do not interact.) Within a process, only the session_lower of the
// db_session_id changes incrementally ("structured" uniqueness).
//
// This basically means that our offsets, counters and file numbers allow us
// to do somewhat "better than random" (birthday paradox) while in the
// degenerate case of completely new session for each tiny file, we still
// have strong uniqueness properties from the birthday paradox, with ~103
// bit session IDs or up to 128 bits entropy with different DB IDs sharing a
// cache.
//
// More collision probability analysis:
// Suppose a RocksDB host generates (generously) 2 GB/s (10TB data, 17 DWPD)
// with average process/session lifetime of (pessimistically) 4 minutes.
// In 180 days (generous allowable data lifespan), we generate 31 million GB
// of data, or 2^55 bytes, and 2^16 "all new" session IDs.
//
// First, suppose this is in a single DB (lifetime 180 days):
// 128 bits cache key size
// - 55 <- ideal size for byte offsets + file numbers
// - 2 <- bits for offsets and file numbers not exactly powers of two
// - 2 <- bits for file number encoding metadata
// + 2 <- bits saved not using byte offsets in BlockBasedTable::GetCacheKey
// ----
// 71 <- bits remaining for distinguishing session IDs
// The probability of a collision in 71 bits of session ID data is less than
// 1 in 2**(71 - (2 * 16)), or roughly 1 in a trillion. And this assumes all
// data from the last 180 days is in cache for potential collision, and that
// cache keys under each session id exhaustively cover the remaining 57 bits
// while in reality they'll only cover a small fraction of it.
//
// Although data could be transferred between hosts, each host has its own
// cache and we are already assuming a high rate of "all new" session ids.
// So this doesn't really change the collision calculation. Across a fleet
// of 1 million, each with <1 in a trillion collision possibility,
// fleetwide collision probability is <1 in a million.
//
// Now suppose we have many DBs per host, say 2**10, with same host-wide write
// rate and process/session lifetime. File numbers will be ~10 bits smaller
// and we will have 2**10 times as many session IDs because of simultaneous
// lifetimes. So now collision chance is less than 1 in 2**(81 - (2 * 26)),
// or roughly 1 in a billion.
//
// Suppose instead we generated random or hashed cache keys for each
// (compressed) block. For 1KB compressed block size, that is 2^45 cache keys
// in 180 days. Collision probability is more easily estimated at roughly
// 1 in 2**(128 - (2 * 45)) or roughly 1 in a trillion (assuming all
// data from the last 180 days is in cache, but NOT the other assumption
// for the 1 in a trillion estimate above).
//
//
// Collision probability estimation through simulation:
// A tool ./cache_bench -stress_cache_key broadly simulates host-wide cache
// activity over many months, by making some pessimistic simplifying
// assumptions. See class StressCacheKey in cache_bench_tool.cc for details.
// Here is some sample output with
// `./cache_bench -stress_cache_key -sck_keep_bits=40`:
//
// Total cache or DBs size: 32TiB Writing 925.926 MiB/s or 76.2939TiB/day
// Multiply by 9.22337e+18 to correct for simulation losses (but still
// assume whole file cached)
//
// These come from default settings of 2.5M files per day of 32 MB each, and
// `-sck_keep_bits=40` means that to represent a single file, we are only
// keeping 40 bits of the 128-bit (base) cache key. With file size of 2**25
// contiguous keys (pessimistic), our simulation is about 2\*\*(128-40-25) or
// about 9 billion billion times more prone to collision than reality.
//
// More default assumptions, relatively pessimistic:
// * 100 DBs in same process (doesn't matter much)
// * Re-open DB in same process (new session ID related to old session ID) on
// average every 100 files generated
// * Restart process (all new session IDs unrelated to old) 24 times per day
//
// After enough data, we get a result at the end (-sck_keep_bits=40):
//
// (keep 40 bits) 17 collisions after 2 x 90 days, est 10.5882 days between
// (9.76592e+19 corrected)
//
// If we believe the (pessimistic) simulation and the mathematical
// extrapolation, we would need to run a billion machines all for 97 billion
// days to expect a cache key collision. To help verify that our extrapolation
// ("corrected") is robust, we can make our simulation more precise with
// `-sck_keep_bits=41` and `42`, which takes more running time to get enough
// collision data:
//
// (keep 41 bits) 16 collisions after 4 x 90 days, est 22.5 days between
// (1.03763e+20 corrected)
// (keep 42 bits) 19 collisions after 10 x 90 days, est 47.3684 days between
// (1.09224e+20 corrected)
//
// The extrapolated prediction is very close. If anything, we might have some
// very small losses of structured data (see class StressCacheKey in
// cache_bench_tool.cc) leading to more accurate & more attractive prediction
// with more bits kept.
//
// With the `-sck_randomize` option, we can see that typical workloads like
// above have lower collision probability than "random" cache keys (note:
// offsets still non-randomized) by a modest amount (roughly 20x less collision
// prone than random), which should make us reasonably comfortable even in
// "degenerate" cases (e.g. repeatedly launch a process to generate 1 file
// with SstFileWriter):
//
// (rand 40 bits) 197 collisions after 1 x 90 days, est 0.456853 days between
// (4.21372e+18 corrected)
//
// We can see that with more frequent process restarts (all new session IDs),
// we get closer to the "random" cache key performance:
//
// (-sck_restarts_per_day=5000): 140 collisions after 1 x 90 days, ...
// (5.92931e+18 corrected)
//
// Other tests have been run to validate other conditions behave as expected,
// never behaving "worse than random" unless we start chopping off structured
// data.
//
//
// Conclusion: Even in extreme cases, rapidly burning through "all new" IDs
// that only arise when a new process is started, the chance of any cache key
// collisions in a giant fleet of machines is negligible. Especially when
// processes live for hours or days, the chance of a cache key collision is
// likely more plausibly due to bad hardware than to bad luck in random
// session ID data. Software defects are surely more likely to cause corruption
// than both of those.
//
// TODO: Nevertheless / regardless, an efficient way to detect (and thus
// quantify) block cache corruptions, including collisions, should be added.
OffsetableCacheKey::OffsetableCacheKey(const std::string &db_id,
const std::string &db_session_id,
uint64_t file_number,
uint64_t max_offset) {
#ifndef NDEBUG
max_offset_ = max_offset;
#endif
// Closely related to GetSstInternalUniqueId, but only need 128 bits and
// need to include an offset within the file.
// See also https://github.com/pdillinger/unique_id for background.
uint64_t session_upper = 0; // Assignment to appease clang-analyze
uint64_t session_lower = 0; // Assignment to appease clang-analyze
{
Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower);
if (!s.ok()) {
// A reasonable fallback in case malformed
Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper,
&session_lower);
}
}
// Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy)
// for more global uniqueness entropy.
// (It is possible that many DBs descended from one common DB id are copied
// around and proliferate, in which case session id is critical, but it is
// more common for different DBs to have different DB ids.)
uint64_t db_hash = Hash64(db_id.data(), db_id.size(), session_upper);
// This establishes the db+session id part of the cache key.
//
// Exactly preserve (in common cases; see modifiers below) session lower to
// ensure that session ids generated during the same process lifetime are
// guaranteed unique.
//
// We put this first for CommonPrefixSlice(), so that a small-ish set of
// cache key prefixes to cover entries relevant to any DB.
session_etc64_ = session_lower;
// This provides extra entopy in case of different DB id or process
// generating a session id, but is also partly/variably obscured by
// file_number and offset (see below).
offset_etc64_ = db_hash;
// Into offset_etc64_ we are (eventually) going to pack & xor in an offset and
// a file_number, but we might need the file_number to overflow into
// session_etc64_. (There must only be one session_etc64_ value per
// file, and preferably shared among many files.)
//
// Figure out how many bytes of file_number we are going to be able to
// pack in with max_offset, though our encoding will only support packing
// in up to 3 bytes of file_number. (16M file numbers is enough for a new
// file number every second for half a year.)
int file_number_bytes_in_offset_etc =
(63 - FloorLog2(max_offset | 0x100000000U)) / 8;
int file_number_bits_in_offset_etc = file_number_bytes_in_offset_etc * 8;
// Assert two bits of metadata
assert(file_number_bytes_in_offset_etc >= 0 &&
file_number_bytes_in_offset_etc <= 3);
// Assert we couldn't have used a larger allowed number of bytes (shift
// would chop off bytes).
assert(file_number_bytes_in_offset_etc == 3 ||
(max_offset << (file_number_bits_in_offset_etc + 8) >>
(file_number_bits_in_offset_etc + 8)) != max_offset);
uint64_t mask = (uint64_t{1} << (file_number_bits_in_offset_etc)) - 1;
// Pack into high bits of etc so that offset can go in low bits of etc
// TODO: could be EndianSwapValue?
uint64_t offset_etc_modifier = ReverseBits(file_number & mask);
assert(offset_etc_modifier << file_number_bits_in_offset_etc == 0U);
// Overflow and 3 - byte count (likely both zero) go into session_id part
uint64_t session_etc_modifier =
(file_number >> file_number_bits_in_offset_etc << 2) |
static_cast<uint64_t>(3 - file_number_bytes_in_offset_etc);
// Packed into high bits to minimize interference with session id counter.
session_etc_modifier = ReverseBits(session_etc_modifier);
// Assert session_id part is only modified in extreme cases
assert(session_etc_modifier == 0 || file_number > /*3 bytes*/ 0xffffffU ||
max_offset > /*5 bytes*/ 0xffffffffffU);
// Xor in the modifiers
session_etc64_ ^= session_etc_modifier;
offset_etc64_ ^= offset_etc_modifier;
// Although DBImpl guarantees (in recent versions) that session_lower is not
// zero, that's not entirely sufficient to guarantee that session_etc64_ is
// not zero (so that the 0 case can be used by CacheKey::CreateUnique*)
if (session_etc64_ == 0U) {
session_etc64_ = session_upper | 1U;
}
assert(session_etc64_ != 0);
}
} // namespace ROCKSDB_NAMESPACE

132
cache/cache_key.h vendored
View File

@ -1,132 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cstdint>
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE {
class Cache;
// A standard holder for fixed-size block cache keys (and for related caches).
// They are created through one of these, each using its own range of values:
// * CacheKey::CreateUniqueForCacheLifetime
// * CacheKey::CreateUniqueForProcessLifetime
// * Default ctor ("empty" cache key)
// * OffsetableCacheKey->WithOffset
//
// The first two use atomic counters to guarantee uniqueness over the given
// lifetime and the last uses a form of universally unique identifier for
// uniqueness with very high probabilty (and guaranteed for files generated
// during a single process lifetime).
//
// CacheKeys are currently used by calling AsSlice() to pass as a key to
// Cache. For performance, the keys are endianness-dependent (though otherwise
// portable). (Persistable cache entries are not intended to cross platforms.)
class CacheKey {
public:
// For convenience, constructs an "empty" cache key that is never returned
// by other means.
inline CacheKey() : session_etc64_(), offset_etc64_() {}
inline bool IsEmpty() const {
return (session_etc64_ == 0) & (offset_etc64_ == 0);
}
// Use this cache key as a Slice (byte order is endianness-dependent)
inline Slice AsSlice() const {
static_assert(sizeof(*this) == 16, "Standardized on 16-byte cache key");
assert(!IsEmpty());
return Slice(reinterpret_cast<const char *>(this), sizeof(*this));
}
// Create a CacheKey that is unique among others associated with this Cache
// instance. Depends on Cache::NewId. This is useful for block cache
// "reservations".
static CacheKey CreateUniqueForCacheLifetime(Cache *cache);
// Create a CacheKey that is unique among others for the lifetime of this
// process. This is useful for saving in a static data member so that
// different DB instances can agree on a cache key for shared entities,
// such as for CacheEntryStatsCollector.
static CacheKey CreateUniqueForProcessLifetime();
protected:
friend class OffsetableCacheKey;
CacheKey(uint64_t session_etc64, uint64_t offset_etc64)
: session_etc64_(session_etc64), offset_etc64_(offset_etc64) {}
uint64_t session_etc64_;
uint64_t offset_etc64_;
};
// A file-specific generator of cache keys, sometimes referred to as the
// "base" cache key for a file because all the cache keys for various offsets
// within the file are computed using simple arithmetic. The basis for the
// general approach is dicussed here: https://github.com/pdillinger/unique_id
// Heavily related to GetUniqueIdFromTableProperties.
//
// If the db_id, db_session_id, and file_number come from the file's table
// properties, then the keys will be stable across DB::Open/Close, backup/
// restore, import/export, etc.
//
// This class "is a" CacheKey only privately so that it is not misused as
// a ready-to-use CacheKey.
class OffsetableCacheKey : private CacheKey {
public:
// For convenience, constructs an "empty" cache key that should not be used.
inline OffsetableCacheKey() : CacheKey() {}
// Constructs an OffsetableCacheKey with the given information about a file.
// max_offset is based on file size (see WithOffset) and is required here to
// choose an appropriate (sub-)encoding. This constructor never generates an
// "empty" base key.
OffsetableCacheKey(const std::string &db_id, const std::string &db_session_id,
uint64_t file_number, uint64_t max_offset);
inline bool IsEmpty() const {
bool result = session_etc64_ == 0;
assert(!(offset_etc64_ > 0 && result));
return result;
}
// Construct a CacheKey for an offset within a file, which must be
// <= max_offset provided in constructor. An offset is not necessarily a
// byte offset if a smaller unique identifier of keyable offsets is used.
//
// This class was designed to make this hot code extremely fast.
inline CacheKey WithOffset(uint64_t offset) const {
assert(!IsEmpty());
assert(offset <= max_offset_);
return CacheKey(session_etc64_, offset_etc64_ ^ offset);
}
// The "common prefix" is a shared prefix for all the returned CacheKeys,
// that also happens to usually be the same among many files in the same DB,
// so is efficient and highly accurate (not perfectly) for DB-specific cache
// dump selection (but not file-specific).
static constexpr size_t kCommonPrefixSize = 8;
inline Slice CommonPrefixSlice() const {
static_assert(sizeof(session_etc64_) == kCommonPrefixSize,
"8 byte common prefix expected");
assert(!IsEmpty());
assert(&this->session_etc64_ == static_cast<const void *>(this));
return Slice(reinterpret_cast<const char *>(this), kCommonPrefixSize);
}
// For any max_offset <= this value, the same encoding scheme is guaranteed.
static constexpr uint64_t kMaxOffsetStandardEncoding = 0xffffffffffU;
private:
#ifndef NDEBUG
uint64_t max_offset_ = 0;
#endif
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,183 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cassert>
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "table/block_based/reader_common.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationHandle::CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr)
: incremental_memory_used_(incremental_memory_used) {
assert(cache_res_mgr);
cache_res_mgr_ = cache_res_mgr;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<
R>::CacheReservationHandle::~CacheReservationHandle() {
Status s = cache_res_mgr_->ReleaseCacheReservation(incremental_memory_used_);
s.PermitUncheckedError();
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationManagerImpl(
std::shared_ptr<Cache> cache, bool delayed_decrease)
: delayed_decrease_(delayed_decrease),
cache_allocated_size_(0),
memory_used_(0) {
assert(cache != nullptr);
cache_ = cache;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::~CacheReservationManagerImpl() {
for (auto* handle : dummy_handles_) {
cache_->Release(handle, true);
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::UpdateCacheReservation(
std::size_t new_mem_used) {
memory_used_ = new_mem_used;
std::size_t cur_cache_allocated_size =
cache_allocated_size_.load(std::memory_order_relaxed);
if (new_mem_used == cur_cache_allocated_size) {
return Status::OK();
} else if (new_mem_used > cur_cache_allocated_size) {
Status s = IncreaseCacheReservation(new_mem_used);
return s;
} else {
// In delayed decrease mode, we don't decrease cache reservation
// untill the memory usage is less than 3/4 of what we reserve
// in the cache.
// We do this because
// (1) Dummy entry insertion is expensive in block cache
// (2) Delayed releasing previously inserted dummy entries can save such
// expensive dummy entry insertion on memory increase in the near future,
// which is likely to happen when the memory usage is greater than or equal
// to 3/4 of what we reserve
if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) {
return Status::OK();
} else {
Status s = DecreaseCacheReservation(new_mem_used);
return s;
}
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>* handle) {
assert(handle);
Status s =
UpdateCacheReservation(GetTotalMemoryUsed() + incremental_memory_used);
(*handle).reset(new CacheReservationManagerImpl::CacheReservationHandle(
incremental_memory_used,
std::enable_shared_from_this<
CacheReservationManagerImpl<R>>::shared_from_this()));
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::ReleaseCacheReservation(
std::size_t incremental_memory_used) {
assert(GetTotalMemoryUsed() >= incremental_memory_used);
std::size_t updated_total_mem_used =
GetTotalMemoryUsed() - incremental_memory_used;
Status s = UpdateCacheReservation(updated_total_mem_used);
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::IncreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) {
Cache::Handle* handle = nullptr;
return_status = cache_->Insert(GetNextCacheKey(), nullptr, kSizeDummyEntry,
GetNoopDeleterForRole<R>(), &handle);
if (return_status != Status::OK()) {
return return_status;
}
dummy_handles_.push_back(handle);
cache_allocated_size_ += kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::DecreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
// Decrease to the smallest multiple of kSizeDummyEntry that is greater than
// or equal to new_mem_used We do addition instead of new_mem_used <=
// cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to
// avoid underflow of size_t when cache_allocated_size_ = 0
while (new_mem_used + kSizeDummyEntry <=
cache_allocated_size_.load(std::memory_order_relaxed)) {
assert(!dummy_handles_.empty());
auto* handle = dummy_handles_.back();
cache_->Release(handle, true);
dummy_handles_.pop_back();
cache_allocated_size_ -= kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalReservedCacheSize() {
return cache_allocated_size_.load(std::memory_order_relaxed);
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalMemoryUsed() {
return memory_used_;
}
template <CacheEntryRole R>
Slice CacheReservationManagerImpl<R>::GetNextCacheKey() {
// Calling this function will have the side-effect of changing the
// underlying cache_key_ that is shared among other keys generated from this
// fucntion. Therefore please make sure the previous keys are saved/copied
// before calling this function.
cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get());
return cache_key_.AsSlice();
}
template <CacheEntryRole R>
Cache::DeleterFn CacheReservationManagerImpl<R>::TEST_GetNoopDeleterForRole() {
return GetNoopDeleterForRole<R>();
}
template class CacheReservationManagerImpl<
CacheEntryRole::kBlockBasedTableReader>;
template class CacheReservationManagerImpl<
CacheEntryRole::kCompressionDictionaryBuildingBuffer>;
template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,288 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
#include "cache/cache_entry_roles.h"
#include "cache/cache_key.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
// CacheReservationManager is an interface for reserving cache space for the
// memory used
class CacheReservationManager {
public:
// CacheReservationHandle is for managing the lifetime of a cache reservation
// for an incremental amount of memory used (i.e, incremental_memory_used)
class CacheReservationHandle {
public:
virtual ~CacheReservationHandle() {}
};
virtual ~CacheReservationManager() {}
virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
virtual Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
*handle) = 0;
virtual std::size_t GetTotalReservedCacheSize() = 0;
virtual std::size_t GetTotalMemoryUsed() = 0;
};
// CacheReservationManagerImpl implements interface CacheReservationManager
// for reserving cache space for the memory used by inserting/releasing dummy
// entries in the cache.
//
// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
// can be called without external synchronization.
template <CacheEntryRole R>
class CacheReservationManagerImpl
: public CacheReservationManager,
public std::enable_shared_from_this<CacheReservationManagerImpl<R>> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr);
~CacheReservationHandle() override;
private:
std::size_t incremental_memory_used_;
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr_;
};
// Construct a CacheReservationManagerImpl
// @param cache The cache where dummy entries are inserted and released for
// reserving cache space
// @param delayed_decrease If set true, then dummy entries won't be released
// immediately when memory usage decreases.
// Instead, it will be released when the memory usage
// decreases to 3/4 of what we have reserved so far.
// This is for saving some future dummy entry
// insertion when memory usage increases are likely to
// happen in the near future.
//
// REQUIRED: cache is not nullptr
explicit CacheReservationManagerImpl(std::shared_ptr<Cache> cache,
bool delayed_decrease = false);
// no copy constructor, copy assignment, move constructor, move assignment
CacheReservationManagerImpl(const CacheReservationManagerImpl &) = delete;
CacheReservationManagerImpl &operator=(const CacheReservationManagerImpl &) =
delete;
CacheReservationManagerImpl(CacheReservationManagerImpl &&) = delete;
CacheReservationManagerImpl &operator=(CacheReservationManagerImpl &&) =
delete;
~CacheReservationManagerImpl() override;
// One of the two ways of reserving/releasing cache space,
// see MakeCacheReservation() for the other.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert and release dummy entries in the cache to
// match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to new_mem_used
//
// Insert dummy entries if new_memory_used > cache_allocated_size_;
//
// Release dummy entries if new_memory_used < cache_allocated_size_
// (and new_memory_used < cache_allocated_size_ * 3/4
// when delayed_decrease is set true);
//
// Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
// or (2) new_memory_used is in the interval of
// [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
// is set true.
//
// @param new_memory_used The number of bytes used by new memory
// The most recent new_memoy_used passed in will be returned
// in GetTotalMemoryUsed() even when the call return non-ok status.
//
// Since the class is NOT thread-safe, external synchronization on the
// order of calling UpdateCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @return On inserting dummy entries, it returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
// On releasing dummy entries, it always returns Status::OK().
// On keeping dummy entries the same, it always returns Status::OK().
Status UpdateCacheReservation(std::size_t new_memory_used) override;
// One of the two ways of reserving cache space and releasing is done through
// destruction of CacheReservationHandle.
// See UpdateCacheReservation() for the other way.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert dummy entries in the cache for the incremental memory usage
// to match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to the total memory used.
//
// A CacheReservationHandle is returned as an output parameter.
// The reserved dummy entries are automatically released on the destruction of
// this handle, which achieves better RAII per cache reservation.
//
// WARNING: Deallocate all the handles of the CacheReservationManager object
// before deallocating the object to prevent unexpected behavior.
//
// @param incremental_memory_used The number of bytes increased in memory
// usage.
//
// Calling GetTotalMemoryUsed() afterward will return the total memory
// increased by this number, even when calling MakeCacheReservation()
// returns non-ok status.
//
// Since the class is NOT thread-safe, external synchronization in
// calling MakeCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @param handle An pointer to std::unique_ptr<CacheReservationHandle> that
// manages the lifetime of the cache reservation represented by the
// handle.
//
// @return It returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
//
// REQUIRES: handle != nullptr
Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override;
// Return the size of the cache (which is a multiple of kSizeDummyEntry)
// successfully reserved by calling UpdateCacheReservation().
//
// When UpdateCacheReservation() returns non-ok status,
// calling GetTotalReservedCacheSize() after that might return a slightly
// smaller number than the actual reserved cache size due to
// the returned number will always be a multiple of kSizeDummyEntry
// and cache full might happen in the middle of inserting a dummy entry.
std::size_t GetTotalReservedCacheSize() override;
// Return the latest total memory used indicated by the most recent call of
// UpdateCacheReservation(std::size_t new_memory_used);
std::size_t GetTotalMemoryUsed() override;
static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
// For testing only - it is to help ensure the NoopDeleterForRole<R>
// accessed from CacheReservationManagerImpl and the one accessed from the
// test are from the same translation units
static Cache::DeleterFn TEST_GetNoopDeleterForRole();
private:
static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
Slice GetNextCacheKey();
Status ReleaseCacheReservation(std::size_t incremental_memory_used);
Status IncreaseCacheReservation(std::size_t new_mem_used);
Status DecreaseCacheReservation(std::size_t new_mem_used);
std::shared_ptr<Cache> cache_;
bool delayed_decrease_;
std::atomic<std::size_t> cache_allocated_size_;
std::size_t memory_used_;
std::vector<Cache::Handle *> dummy_handles_;
CacheKey cache_key_;
};
class ConcurrentCacheReservationManager
: public CacheReservationManager,
public std::enable_shared_from_this<ConcurrentCacheReservationManager> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle) {
assert(cache_res_mgr && cache_res_handle);
cache_res_mgr_ = cache_res_mgr;
cache_res_handle_ = std::move(cache_res_handle);
}
~CacheReservationHandle() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_->cache_res_mgr_mu_);
cache_res_handle_.reset();
}
private:
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle_;
};
explicit ConcurrentCacheReservationManager(
std::shared_ptr<CacheReservationManager> cache_res_mgr) {
cache_res_mgr_ = std::move(cache_res_mgr);
}
ConcurrentCacheReservationManager(const ConcurrentCacheReservationManager &) =
delete;
ConcurrentCacheReservationManager &operator=(
const ConcurrentCacheReservationManager &) = delete;
ConcurrentCacheReservationManager(ConcurrentCacheReservationManager &&) =
delete;
ConcurrentCacheReservationManager &operator=(
ConcurrentCacheReservationManager &&) = delete;
~ConcurrentCacheReservationManager() override {}
inline Status UpdateCacheReservation(std::size_t new_memory_used) override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
}
inline Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override {
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
wrapped_handle;
Status s;
{
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
s = cache_res_mgr_->MakeCacheReservation(incremental_memory_used,
&wrapped_handle);
}
(*handle).reset(
new ConcurrentCacheReservationManager::CacheReservationHandle(
std::enable_shared_from_this<
ConcurrentCacheReservationManager>::shared_from_this(),
std::move(wrapped_handle)));
return s;
}
inline std::size_t GetTotalReservedCacheSize() override {
return cache_res_mgr_->GetTotalReservedCacheSize();
}
inline std::size_t GetTotalMemoryUsed() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->GetTotalMemoryUsed();
}
private:
std::mutex cache_res_mgr_mu_;
std::shared_ptr<CacheReservationManager> cache_res_mgr_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,468 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "test_util/testharness.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
class CacheReservationManagerTest : public ::testing::Test {
protected:
static constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
static constexpr int kNumShardBits = 0; // 2^0 shard
static constexpr std::size_t kMetaDataChargeOverhead = 10000;
std::shared_ptr<Cache> cache = NewLRUCache(kCacheCapacity, kNumShardBits);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng;
CacheReservationManagerTest() {
test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
}
};
TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
// Next unique Cache key
CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
// Get to the underlying values
using PairU64 = std::array<uint64_t, 2>;
auto& ckey_pair = *reinterpret_cast<PairU64*>(&ckey);
// Back it up to the one used by CRM (using CacheKey implementation details)
ckey_pair[1]--;
// Specific key (subject to implementation details)
EXPECT_EQ(ckey_pair, PairU64({0, 2}));
Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
EXPECT_NE(handle, nullptr)
<< "Failed to generate the cache key for the dummy entry correctly";
// Clean up the returned handle from Lookup() to prevent memory leak
cache->Release(handle);
}
TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to keep cache reservation the same when new_mem_used equals "
"to current cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used equals to current "
"cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly when new_mem_used "
"equals to current cache reservation";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to keep underlying dummy entries the same when new_mem_used "
"equals to current cache reservation";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
3 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
3 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest,
IncreaseCacheReservationOnFullCache) {
;
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry;
constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kSmallCacheCapacity;
lo.num_shard_bits = 0; // 2^0 shard
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = kSmallCacheCapacity + 1;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation after encountering cache "
"reservation failure due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly after "
"encountering cache reservation due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
// Create cache full again for subsequent tests
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
// Increase cache capacity so the previously failed insertion can fully
// succeed
cache->SetCapacity(kBigCacheCapacity);
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation after increasing cache capacity "
"and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
5 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly after "
"increasing cache capacity and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
EXPECT_LT(cache->GetPinnedUsage(),
5 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 1 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = kSizeDummyEntry / 2;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerWithDelayedDecreaseTest,
DecreaseCacheReservationWithDelayedDecrease) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache, true /* delayed_decrease */);
std::size_t new_mem_used = 8 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
8 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 6 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 7 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 6 * kSizeDummyEntry - 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
6 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
EXPECT_LT(cache->GetPinnedUsage(),
6 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
}
TEST(CacheReservationManagerDestructorTest,
ReleaseRemainingDummyEntriesOnDestruction) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
{
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
}
EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry)
<< "Failed to release remaining underlying dummy entries in cache in "
"CacheReservationManager's destructor";
}
TEST(CacheReservationHandleTest, HandleTest) {
constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024;
constexpr std::size_t kSizeDummyEntry = 256 * 1024;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kOneGigabyte;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng(
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache));
std::size_t mem_used = 0;
const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry;
const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry;
std::unique_ptr<CacheReservationManager::CacheReservationHandle> handle_1,
handle_2;
// To test consecutive CacheReservationManager::MakeCacheReservation works
// correctly in terms of returning the handle as well as updating cache
// reservation and the latest total memory used
Status s = test_cache_rev_mng->MakeCacheReservation(
incremental_mem_used_handle_1, &handle_1);
mem_used = mem_used + incremental_mem_used_handle_1;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_1 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2,
&handle_2);
mem_used = mem_used + incremental_mem_used_handle_2;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_2 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test
// CacheReservationManager::CacheReservationHandle::~CacheReservationHandle()
// works correctly in releasing the cache reserved for the handle
handle_1.reset();
EXPECT_TRUE(handle_1 == nullptr);
mem_used = mem_used - incremental_mem_used_handle_1;
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test the actual CacheReservationManager object won't be deallocated
// as long as there remain handles pointing to it.
// We strongly recommend deallocating CacheReservationManager object only
// after all its handles are deallocated to keep things easy to reasonate
test_cache_rev_mng.reset();
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
handle_2.reset();
// The CacheReservationManager object is now deallocated since all the handles
// and its original pointer is gone
mem_used = mem_used - incremental_mem_used_handle_2;
EXPECT_EQ(mem_used, 0);
EXPECT_EQ(cache->GetPinnedUsage(), mem_used);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

49
cache/cache_test.cc vendored
View File

@ -14,9 +14,7 @@
#include <iostream>
#include <string>
#include <vector>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "cache/lru_cache.h"
#include "test_util/testharness.h"
#include "util/coding.h"
@ -41,7 +39,6 @@ static int DecodeValue(void* v) {
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kFast = "fast";
void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
@ -86,9 +83,6 @@ class CacheTest : public testing::TestWithParam<std::string> {
if (type == kClock) {
return NewClockCache(capacity);
}
if (type == kFast) {
return NewFastLRUCache(capacity);
}
return nullptr;
}
@ -109,10 +103,6 @@ class CacheTest : public testing::TestWithParam<std::string> {
return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
if (type == kFast) {
return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
return nullptr;
}
@ -193,7 +183,7 @@ TEST_P(CacheTest, UsageTest) {
// make sure the cache will be overloaded
for (uint64_t i = 1; i < kCapacity; ++i) {
auto key = std::to_string(i);
auto key = ToString(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
@ -265,7 +255,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
// check that overloading the cache does not change the pinned usage
for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
auto key = std::to_string(i);
auto key = ToString(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
@ -585,7 +575,7 @@ TEST_P(CacheTest, SetCapacity) {
std::vector<Cache::Handle*> handles(10);
// Insert 5 entries, but not releasing.
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
@ -600,7 +590,7 @@ TEST_P(CacheTest, SetCapacity) {
// then decrease capacity to 7, final capacity should be 7
// and usage should be 7
for (size_t i = 5; i < 10; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
@ -619,9 +609,6 @@ TEST_P(CacheTest, SetCapacity) {
for (size_t i = 5; i < 10; i++) {
cache->Release(handles[i]);
}
// Make sure this doesn't crash or upset ASAN/valgrind
cache->DisownData();
}
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
@ -631,7 +618,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
std::vector<Cache::Handle*> handles(10);
Status s;
for (size_t i = 0; i < 10; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i + 1);
s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
@ -655,7 +642,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
// test3: init with flag being true.
std::shared_ptr<Cache> cache2 = NewCache(5, 0, true);
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i + 1);
s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
@ -685,14 +672,14 @@ TEST_P(CacheTest, OverCapacity) {
// Insert n+1 entries, but not releasing.
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
// Guess what's in the cache now?
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
auto h = cache->Lookup(key);
ASSERT_TRUE(h != nullptr);
if (h) cache->Release(h);
@ -713,7 +700,7 @@ TEST_P(CacheTest, OverCapacity) {
// This is consistent with the LRU policy since the element 0
// was released first
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
auto h = cache->Lookup(key);
if (h) {
ASSERT_NE(i, 0U);
@ -754,9 +741,9 @@ TEST_P(CacheTest, ApplyToAllEntriesTest) {
std::vector<std::string> callback_state;
const auto callback = [&](const Slice& key, void* value, size_t charge,
Cache::DeleterFn deleter) {
callback_state.push_back(std::to_string(DecodeKey(key)) + "," +
std::to_string(DecodeValue(value)) + "," +
std::to_string(charge));
callback_state.push_back(ToString(DecodeKey(key)) + "," +
ToString(DecodeValue(value)) + "," +
ToString(charge));
assert(deleter == &CacheTest::Deleter);
};
@ -765,8 +752,8 @@ TEST_P(CacheTest, ApplyToAllEntriesTest) {
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," +
std::to_string(i + 1));
inserted.push_back(ToString(i) + "," + ToString(i * 2) + "," +
ToString(i + 1));
}
cache_->ApplyToAllEntries(callback, /*opts*/ {});
@ -848,13 +835,11 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
std::shared_ptr<Cache> (*new_clock_cache_func)(
size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kClock, kFast));
testing::Values(kLRU, kClock));
#else
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kFast));
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU));
#endif // SUPPORT_CLOCK_CACHE
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
testing::Values(kLRU, kFast));
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU));
} // namespace ROCKSDB_NAMESPACE

44
cache/clock_cache.cc vendored
View File

@ -33,11 +33,11 @@ std::shared_ptr<Cache> NewClockCache(
#ifndef ROCKSDB_USE_RTTI
#define TBB_USE_EXCEPTIONS 0
#endif
#include "tbb/concurrent_hash_map.h"
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "tbb/concurrent_hash_map.h"
#include "util/autovector.h"
#include "util/mutexlock.h"
@ -228,22 +228,22 @@ struct CacheHandle {
};
// Key of hash map. We store hash value with the key for convenience.
struct ClockCacheKey {
struct CacheKey {
Slice key;
uint32_t hash_value;
ClockCacheKey() = default;
CacheKey() = default;
ClockCacheKey(const Slice& k, uint32_t h) {
CacheKey(const Slice& k, uint32_t h) {
key = k;
hash_value = h;
}
static bool equal(const ClockCacheKey& a, const ClockCacheKey& b) {
static bool equal(const CacheKey& a, const CacheKey& b) {
return a.hash_value == b.hash_value && a.key == b.key;
}
static size_t hash(const ClockCacheKey& a) {
static size_t hash(const CacheKey& a) {
return static_cast<size_t>(a.hash_value);
}
};
@ -260,8 +260,7 @@ struct CleanupContext {
class ClockCacheShard final : public CacheShard {
public:
// Hash map type.
using HashTable =
tbb::concurrent_hash_map<ClockCacheKey, CacheHandle*, ClockCacheKey>;
typedef tbb::concurrent_hash_map<CacheKey, CacheHandle*, CacheKey> HashTable;
ClockCacheShard();
~ClockCacheShard() override;
@ -286,8 +285,8 @@ class ClockCacheShard final : public CacheShard {
return Lookup(key, hash);
}
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
bool force_erase) override {
return Release(handle, force_erase);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
@ -297,7 +296,7 @@ class ClockCacheShard final : public CacheShard {
//
// Not necessary to hold mutex_ before being called.
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
bool Release(Cache::Handle* handle, bool force_erase = false) override;
void Erase(const Slice& key, uint32_t hash) override;
bool EraseAndConfirm(const Slice& key, uint32_t hash,
CleanupContext* context);
@ -560,7 +559,7 @@ bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
std::memory_order_relaxed)) {
bool erased __attribute__((__unused__)) =
table_.erase(ClockCacheKey(handle->key, handle->hash));
table_.erase(CacheKey(handle->key, handle->hash));
assert(erased);
RecycleHandle(handle, context);
return true;
@ -657,13 +656,13 @@ CacheHandle* ClockCacheShard::Insert(
// relative updates (fetch_add, etc).
handle->flags.store(flags, std::memory_order_relaxed);
HashTable::accessor accessor;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
if (table_.find(accessor, CacheKey(key, hash))) {
*overwritten = true;
CacheHandle* existing_handle = accessor->second;
table_.erase(accessor);
UnsetInCache(existing_handle, context);
}
table_.insert(HashTable::value_type(ClockCacheKey(key, hash), handle));
table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
if (hold_reference) {
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
@ -687,7 +686,7 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
Status s;
if (out_handle != nullptr) {
if (handle == nullptr) {
s = Status::Incomplete("Insert failed due to CLOCK cache being full.");
s = Status::Incomplete("Insert failed due to LRU cache being full.");
} else {
*out_handle = reinterpret_cast<Cache::Handle*>(handle);
}
@ -702,7 +701,7 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
HashTable::const_accessor accessor;
if (!table_.find(accessor, ClockCacheKey(key, hash))) {
if (!table_.find(accessor, CacheKey(key, hash))) {
return nullptr;
}
CacheHandle* handle = accessor->second;
@ -725,11 +724,11 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
return reinterpret_cast<Cache::Handle*>(handle);
}
bool ClockCacheShard::Release(Cache::Handle* h, bool erase_if_last_ref) {
bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) {
CleanupContext context;
CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
bool erased = Unref(handle, true, &context);
if (erase_if_last_ref && !erased) {
if (force_erase && !erased) {
erased = EraseAndConfirm(handle->key, handle->hash, &context);
}
Cleanup(context);
@ -747,7 +746,7 @@ bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
MutexLock l(&mutex_);
HashTable::accessor accessor;
bool erased = false;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
if (table_.find(accessor, CacheKey(key, hash))) {
CacheHandle* handle = accessor->second;
table_.erase(accessor);
erased = UnsetInCache(handle, context);
@ -810,10 +809,9 @@ class ClockCache final : public ShardedCache {
}
void DisownData() override {
// Leak data only if that won't generate an ASAN/valgrind warning
if (!kMustFreeHeapAllocations) {
#ifndef MUST_FREE_HEAP_ALLOCATIONS
shards_ = nullptr;
}
#endif
}
void WaitAll(std::vector<Handle*>& /*handles*/) override {}

View File

@ -1,171 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <memory>
#include "memory/memory_allocator.h"
#include "util/compression.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
namespace {
void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<CacheAllocationPtr*>(obj);
obj = nullptr;
}
} // namespace
CompressedSecondaryCache::CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version)
: cache_options_(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, compression_type,
compress_format_version) {
cache_ = NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy);
}
CompressedSecondaryCache::~CompressedSecondaryCache() { cache_.reset(); }
std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) {
std::unique_ptr<SecondaryCacheResultHandle> handle;
is_in_sec_cache = false;
Cache::Handle* lru_handle = cache_->Lookup(key);
if (lru_handle == nullptr) {
return handle;
}
CacheAllocationPtr* ptr =
reinterpret_cast<CacheAllocationPtr*>(cache_->Value(lru_handle));
void* value = nullptr;
size_t charge = 0;
Status s;
if (cache_options_.compression_type == kNoCompression) {
s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
} else {
UncompressionContext uncompression_context(cache_options_.compression_type);
UncompressionInfo uncompression_info(uncompression_context,
UncompressionDict::GetEmptyDict(),
cache_options_.compression_type);
size_t uncompressed_size = 0;
CacheAllocationPtr uncompressed;
uncompressed = UncompressData(
uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
&uncompressed_size, cache_options_.compress_format_version,
cache_options_.memory_allocator.get());
if (!uncompressed) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
}
if (!s.ok()) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
return handle;
}
Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) {
size_t size = (*helper->size_cb)(value);
CacheAllocationPtr ptr =
AllocateBlock(size, cache_options_.memory_allocator.get());
Status s = (*helper->saveto_cb)(value, 0, size, ptr.get());
if (!s.ok()) {
return s;
}
Slice val(ptr.get(), size);
std::string compressed_val;
if (cache_options_.compression_type != kNoCompression) {
CompressionOptions compression_opts;
CompressionContext compression_context(cache_options_.compression_type);
uint64_t sample_for_compression = 0;
CompressionInfo compression_info(
compression_opts, compression_context, CompressionDict::GetEmptyDict(),
cache_options_.compression_type, sample_for_compression);
bool success =
CompressData(val, compression_info,
cache_options_.compress_format_version, &compressed_val);
if (!success) {
return Status::Corruption("Error compressing value.");
}
val = Slice(compressed_val);
size = compressed_val.size();
ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
memcpy(ptr.get(), compressed_val.data(), size);
}
CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
return cache_->Insert(key, buf, size, DeletionCallback);
}
void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
std::string CompressedSecondaryCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
ret.append(cache_->GetPrintableOptions());
snprintf(buffer, kBufferSize, " compression_type : %s\n",
CompressionTypeToString(cache_options_.compression_type).c_str());
ret.append(buffer);
snprintf(buffer, kBufferSize, " compression_type : %d\n",
cache_options_.compress_format_version);
ret.append(buffer);
return ret;
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version) {
return std::make_shared<CompressedSecondaryCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
memory_allocator, use_adaptive_mutex, metadata_charge_policy,
compression_type, compress_format_version);
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts) {
// The secondary_cache is disabled for this LRUCache instance.
assert(opts.secondary_cache == nullptr);
return NewCompressedSecondaryCache(
opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
opts.metadata_charge_policy, opts.compression_type,
opts.compress_format_version);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,86 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <memory>
#include "cache/lru_cache.h"
#include "memory/memory_allocator.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
public:
CompressedSecondaryCacheResultHandle(void* value, size_t size)
: value_(value), size_(size) {}
virtual ~CompressedSecondaryCacheResultHandle() override = default;
CompressedSecondaryCacheResultHandle(
const CompressedSecondaryCacheResultHandle&) = delete;
CompressedSecondaryCacheResultHandle& operator=(
const CompressedSecondaryCacheResultHandle&) = delete;
bool IsReady() override { return true; }
void Wait() override {}
void* Value() override { return value_; }
size_t Size() override { return size_; }
private:
void* value_;
size_t size_;
};
// The CompressedSecondaryCache is a concrete implementation of
// rocksdb::SecondaryCache.
//
// Users can also cast a pointer to it and call methods on
// it directly, especially custom methods that may be added
// in the future. For example -
// std::unique_ptr<rocksdb::SecondaryCache> cache =
// NewCompressedSecondaryCache(opts);
// static_cast<CompressedSecondaryCache*>(cache.get())->Erase(key);
class CompressedSecondaryCache : public SecondaryCache {
public:
CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
virtual ~CompressedSecondaryCache() override;
const char* Name() const override { return "CompressedSecondaryCache"; }
Status Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) override;
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) override;
void Erase(const Slice& key) override;
void WaitAll(std::vector<SecondaryCacheResultHandle*> /*handles*/) override {}
std::string GetPrintableOptions() const override;
private:
std::shared_ptr<Cache> cache_;
CompressedSecondaryCacheOptions cache_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,607 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <algorithm>
#include <cstdint>
#include "memory/jemalloc_nodump_allocator.h"
#include "memory/memory_allocator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/compression.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheTest : public testing::Test {
public:
CompressedSecondaryCacheTest() : fail_create_(false) {}
~CompressedSecondaryCacheTest() {}
protected:
class TestItem {
public:
TestItem(const char* buf, size_t size) : buf_(new char[size]), size_(size) {
memcpy(buf_.get(), buf, size);
}
~TestItem() {}
char* Buf() { return buf_.get(); }
size_t Size() { return size_; }
private:
std::unique_ptr<char[]> buf_;
size_t size_;
};
static size_t SizeCallback(void* obj) {
return reinterpret_cast<TestItem*>(obj)->Size();
}
static Status SaveToCallback(void* from_obj, size_t from_offset,
size_t length, void* out) {
TestItem* item = reinterpret_cast<TestItem*>(from_obj);
const char* buf = item->Buf();
EXPECT_EQ(length, item->Size());
EXPECT_EQ(from_offset, 0);
memcpy(out, buf, length);
return Status::OK();
}
static void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<TestItem*>(obj);
obj = nullptr;
}
static Cache::CacheItemHelper helper_;
static Status SaveToCallbackFail(void* /*obj*/, size_t /*offset*/,
size_t /*size*/, void* /*out*/) {
return Status::NotSupported();
}
static Cache::CacheItemHelper helper_fail_;
Cache::CreateCallback test_item_creator = [&](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
if (fail_create_) {
return Status::NotSupported();
}
*out_obj = reinterpret_cast<void*>(new TestItem((char*)buf, size));
*charge = size;
return Status::OK();
};
void SetFailCreate(bool fail) { fail_create_ = fail; }
void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) {
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
opts.metadata_charge_policy = kDontChargeCacheMetadata;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
opts.compression_type = CompressionType::kNoCompression;
}
} else {
opts.compression_type = CompressionType::kNoCompression;
}
if (use_jemalloc) {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (s.ok()) {
opts.memory_allocator = allocator;
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
}
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(opts);
bool is_in_sec_cache{true};
// Lookup an non-existent key.
std::unique_ptr<SecondaryCacheResultHandle> handle0 =
sec_cache->Lookup("k0", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle0, nullptr);
Random rnd(301);
// Insert and Lookup the first item.
std::string str1;
test::CompressibleString(&rnd, 0.25, 1000, &str1);
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle1, nullptr);
ASSERT_FALSE(is_in_sec_cache);
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
// Lookup the first item again.
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Insert and Lookup the second item.
std::string str2;
test::CompressibleString(&rnd, 0.5, 1000, &str2);
TestItem item2(str2.data(), str2.length());
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
std::vector<SecondaryCacheResultHandle*> handles = {handle1.get(),
handle2.get()};
sec_cache->WaitAll(handles);
sec_cache.reset();
}
void FailsTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 1100;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
// Insert and Lookup the first item.
Random rnd(301);
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
// Insert and Lookup the second item.
std::string str2(rnd.RandomString(200));
TestItem item2(str2.data(), str2.length());
// k1 is evicted.
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
// Create Fails.
SetFailCreate(true);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
// Save Fails.
std::string str3 = rnd.RandomString(10);
TestItem item3(str3.data(), str3.length());
ASSERT_NOK(sec_cache->Insert("k3", &item3,
&CompressedSecondaryCacheTest::helper_fail_));
sec_cache.reset();
}
void BasicIntegrationTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2300;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions lru_cache_opts(1024, 0, false, 0.5, nullptr,
kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
lru_cache_opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(lru_cache_opts);
std::shared_ptr<Statistics> stats = CreateDBStatistics();
Random rnd(301);
std::string str1 = rnd.RandomString(1010);
std::string str1_clone{str1};
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// After Insert, lru cache contains k2 and secondary cache contains k1.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
std::string str3 = rnd.RandomString(1020);
TestItem* item3 = new TestItem(str3.data(), str3.length());
// After Insert, lru cache contains k3 and secondary cache contains k1 and
// k2
ASSERT_OK(cache->Insert("k3", item3, &CompressedSecondaryCacheTest::helper_,
str3.length()));
Cache::Handle* handle;
handle = cache->Lookup("k3", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val3 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val3, nullptr);
ASSERT_EQ(memcmp(val3->Buf(), item3->Buf(), item3->Size()), 0);
cache->Release(handle);
// Lookup an non-existent key.
handle = cache->Lookup("k0", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_EQ(handle, nullptr);
// This Lookup should promote k1 and erase k1 from the secondary cache,
// then k3 is demoted. So k2 and k3 are in the secondary cache.
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val1_1 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), str1_clone.data(), str1_clone.size()), 0);
cache->Release(handle);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void BasicIntegrationFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
auto item1 =
std::unique_ptr<TestItem>(new TestItem(str1.data(), str1.length()));
ASSERT_NOK(cache->Insert("k1", item1.get(), nullptr, str1.length()));
ASSERT_OK(cache->Insert("k1", item1.get(),
&CompressedSecondaryCacheTest::helper_,
str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
Cache::Handle* handle;
handle = cache->Lookup("k2", nullptr, test_item_creator,
Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false);
ASSERT_EQ(handle, nullptr);
cache.reset();
secondary_cache.reset();
}
void IntegrationSaveFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1,
&CompressedSecondaryCacheTest::helper_fail_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2,
&CompressedSecondaryCacheTest::helper_fail_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationCreateFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationFullCapacityTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr,
kDefaultToAdaptiveMutex, kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle2;
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle1;
handle1 = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle1, nullptr);
cache->Release(handle1);
// Since k1 didn't get inserted, k2 should still be in cache
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
cache.reset();
secondary_cache.reset();
}
private:
bool fail_create_;
};
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallback,
CompressedSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_fail_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallbackFail,
CompressedSecondaryCacheTest::DeletionCallback);
TEST_F(CompressedSecondaryCacheTest, BasicTestWithNoCompression) {
BasicTest(false, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndNoCompression) {
BasicTest(false, true);
}
TEST_F(CompressedSecondaryCacheTest, BasicTestWithCompression) {
BasicTest(true, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndCompression) {
BasicTest(true, true);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithNoCompression) {
FailsTest(false);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithCompression) {
FailsTest(true);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithNoCompression) {
BasicIntegrationTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithCompression) {
BasicIntegrationTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
BasicIntegrationFailTestWithNoCompression) {
BasicIntegrationFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationFailTestWithCompression) {
BasicIntegrationFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithNoCompression) {
IntegrationSaveFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithCompression) {
IntegrationSaveFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationCreateFailTestWithNoCompression) {
IntegrationCreateFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationCreateFailTestWithCompression) {
IntegrationCreateFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithNoCompression) {
IntegrationFullCapacityTest(false);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithCompression) {
IntegrationFullCapacityTest(true);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,511 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/fast_lru_cache.h"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
list_(new LRUHandle* [size_t{1} << length_bits_] {}),
elems_(0),
max_length_bits_(max_upper_hash_bits) {}
LRUHandleTable::~LRUHandleTable() {
ApplyToEntriesRange(
[](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
},
0, uint32_t{1} << length_bits_);
}
LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
return *FindPointer(key, hash);
}
LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
LRUHandle** ptr = FindPointer(h->key(), h->hash);
LRUHandle* old = *ptr;
h->next_hash = (old == nullptr ? nullptr : old->next_hash);
*ptr = h;
if (old == nullptr) {
++elems_;
if ((elems_ >> length_bits_) > 0) { // elems_ >= length
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
}
}
return old;
}
LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
LRUHandle** ptr = FindPointer(key, hash);
LRUHandle* result = *ptr;
if (result != nullptr) {
*ptr = result->next_hash;
--elems_;
}
return result;
}
LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
}
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
return;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
return;
}
uint32_t old_length = uint32_t{1} << length_bits_;
int new_length_bits = length_bits_ + 1;
std::unique_ptr<LRUHandle* []> new_list {
new LRUHandle* [size_t{1} << new_length_bits] {}
};
uint32_t count = 0;
for (uint32_t i = 0; i < old_length; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
h->next_hash = *ptr;
*ptr = h;
h = next;
count++;
}
}
assert(elems_ == count);
list_ = std::move(new_list);
length_bits_ = new_length_bits;
}
LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits)
: capacity_(0),
strict_capacity_limit_(strict_capacity_limit),
table_(max_upper_hash_bits),
usage_(0),
lru_usage_(0) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
SetCapacity(capacity);
}
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference_list.push_back(old);
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
MutexLock l(&mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) {
// Going to end
index_end = length;
*state = UINT32_MAX;
} else {
*state = index_end << (32 - length_bits);
}
table_.ApplyToEntriesRange(
[callback](LRUHandle* h) {
callback(h->key(), h->value, h->charge, h->deleter);
},
index_begin, index_end);
}
void LRUCacheShard::LRU_Remove(LRUHandle* e) {
assert(e->next != nullptr);
assert(e->prev != nullptr);
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(lru_usage_ >= total_charge);
lru_usage_ -= total_charge;
}
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
// Inset "e" to head of LRU list.
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
e->next->prev = e;
lru_usage_ += total_charge;
}
void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
deleted->push_back(old);
}
}
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
capacity_ = capacity;
EvictFromLRU(0, &last_reference_list);
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
MutexLock l(&mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
bool free_handle_on_fail) {
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
e->SetInCache(false);
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
last_reference_list.push_back(e);
} else {
if (free_handle_on_fail) {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
}
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
if (!e->HasRefs()) {
e->Ref();
}
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
return s;
}
Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
LRUHandle* e = nullptr;
{
MutexLock l(&mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
}
}
return reinterpret_cast<Cache::Handle*>(e);
}
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
return true;
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
if (handle == nullptr) {
return false;
}
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
LRU_Insert(e);
last_reference = false;
}
}
// If it was the last reference, then decrement the cache usage.
if (last_reference) {
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
// Free the entry here outside of mutex for performance reasons.
if (last_reference) {
e->Free();
}
return last_reference;
}
Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, Cache::DeleterFn deleter,
Cache::Handle** handle,
Cache::Priority /*priority*/) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->value = value;
e->flags = 0;
e->deleter = deleter;
e->charge = charge;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetInCache(true);
memcpy(e->key_data, key.data(), key.size());
return InsertItem(e, handle, /* free_handle_on_fail */ true);
}
void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
LRUHandle* e;
bool last_reference = false;
{
MutexLock l(&mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
e->SetInCache(false);
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference = true;
}
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
if (last_reference) {
e->Free();
}
}
size_t LRUCacheShard::GetUsage() const {
MutexLock l(&mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
MutexLock l(&mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i])
LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
/* max_upper_hash_bits */ 32 - num_shard_bits);
}
}
LRUCache::~LRUCache() {
if (shards_ != nullptr) {
assert(num_shards_ > 0);
for (int i = 0; i < num_shards_; i++) {
shards_[i].~LRUCacheShard();
}
port::cacheline_aligned_free(shards_);
}
}
CacheShard* LRUCache::GetShard(uint32_t shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* LRUCache::GetShard(uint32_t shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* LRUCache::Value(Handle* handle) {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const LRUHandle*>(handle);
return h->deleter;
}
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
num_shards_ = 0;
}
}
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<fast_lru_cache::LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE

299
cache/fast_lru_cache.h vendored
View File

@ -1,299 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <string>
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
// An experimental (under development!) alternative to LRUCache
struct LRUHandle {
void* value;
Cache::DeleterFn deleter;
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
// The hash of key(). Used for fast sharding and comparisons.
uint32_t hash;
// The number of external refs to this entry. The cache itself is not counted.
uint32_t refs;
enum Flags : uint8_t {
// Whether this entry is referenced by the hash table.
IN_CACHE = (1 << 0),
};
uint8_t flags;
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
char key_data[1];
Slice key() const { return Slice(key_data, key_length); }
// Increase the reference count by 1.
void Ref() { refs++; }
// Just reduce the reference count by 1. Return true if it was last reference.
bool Unref() {
assert(refs > 0);
refs--;
return refs == 0;
}
// Return true if there are external refs, false otherwise.
bool HasRefs() const { return refs > 0; }
bool InCache() const { return flags & IN_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
flags |= IN_CACHE;
} else {
flags &= ~IN_CACHE;
}
}
void Free() {
assert(refs == 0);
if (deleter) {
(*deleter)(key(), value);
}
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
return charge + meta_charge;
}
};
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested. E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class LRUHandleTable {
public:
// If the table uses more hash bits than `max_upper_hash_bits`,
// it will eat into the bits used for sharding, which are constant
// for a given LRUHandleTable.
explicit LRUHandleTable(int max_upper_hash_bits);
~LRUHandleTable();
LRUHandle* Lookup(const Slice& key, uint32_t hash);
LRUHandle* Insert(LRUHandle* h);
LRUHandle* Remove(const Slice& key, uint32_t hash);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
for (uint32_t i = index_begin; i < index_end; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
auto n = h->next_hash;
assert(h->InCache());
func(h);
h = n;
}
}
}
int GetLengthBits() const { return length_bits_; }
private:
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash);
void Resize();
// Number of hash bits (upper because lower bits used for sharding)
// used for table index. Length == 1 << length_bits_
int length_bits_;
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
const int max_length_bits_;
};
// A single shard of sharded cache.
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits);
~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
void SetStrictCapacityLimit(bool strict_capacity_limit) override;
// Like Cache methods, but with an extra "hash" parameter.
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
Cache::DeleterFn deleter, Cache::Handle** handle,
Cache::Priority priority) override;
Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) override {
return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* /*helper*/,
const Cache::CreateCallback& /*create_cb*/,
Cache::Priority /*priority*/, bool /*wait*/,
Statistics* /*stats*/) override {
return Lookup(key, hash);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
void Erase(const Slice& key, uint32_t hash) override;
size_t GetUsage() const override;
size_t GetPinnedUsage() const override;
void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
void EraseUnRefEntries() override;
std::string GetPrintableOptions() const override;
private:
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
size_t capacity_;
// Whether to reject insertion if cache reaches its full capacity.
bool strict_capacity_limit_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
// LRU contains items which can be evicted, ie reference only by cache
LRUHandle lru_;
// Pointer to head of low-pri pool in LRU list.
LRUHandle* lru_low_pri_;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
//
// We separate data members that are updated frequently from the ones that
// are not frequently updated so that they don't share the same cache line
// which will lead into false cache sharing
//
// ------------------------------------
// Frequently modified data members
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
size_t usage_;
// Memory size for entries residing only in the LRU list.
size_t lru_usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable port::Mutex mutex_;
};
class LRUCache
#ifdef NDEBUG
final
#endif
: public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata);
~LRUCache() override;
const char* Name() const override { return "LRUCache"; }
CacheShard* GetShard(uint32_t shard) override;
const CacheShard* GetShard(uint32_t shard) const override;
void* Value(Handle* handle) override;
size_t GetCharge(Handle* handle) const override;
uint32_t GetHash(Handle* handle) const override;
DeleterFn GetDeleter(Handle* handle) const override;
void DisownData() override;
private:
LRUCacheShard* shards_ = nullptr;
int num_shards_ = 0;
};
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
} // namespace ROCKSDB_NAMESPACE

89
cache/lru_cache.cc vendored
View File

@ -15,11 +15,9 @@
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
@ -77,12 +75,13 @@ LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
// Due to reaching limit of hash information, if we made the table
// bigger, we would allocate more addresses but only the same
// number would be used.
return;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
// Avoid undefined behavior shifting uint32_t by 32
return;
}
@ -125,7 +124,7 @@ LRUCacheShard::LRUCacheShard(
mutex_(use_adaptive_mutex),
secondary_cache_(secondary_cache) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
// Make empty circular linked list
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
@ -138,7 +137,7 @@ void LRUCacheShard::EraseUnRefEntries() {
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// LRU list contains only elements which can be evicted
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
@ -168,7 +167,7 @@ void LRUCacheShard::ApplyToSomeEntries(
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
// this simplifies some logic (index_end will not overflow)
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
@ -274,7 +273,7 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// LRU list contains only elements which can be evicted
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
@ -295,11 +294,11 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
EvictFromLRU(0, &last_reference_list);
}
// Try to insert the evicted entries into tiered cache.
// Free the entries outside of mutex for performance reasons.
// Try to insert the evicted entries into tiered cache
// Free the entries outside of mutex for performance reasons
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
!entry->IsPromoted()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
@ -322,7 +321,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
// is freed or the lru list is empty
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
@ -349,7 +348,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
// old is on LRU because it's in cache and its reference count is 0
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
@ -361,7 +360,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
// If caller already holds a ref, no need to take one here
if (!e->HasRefs()) {
e->Ref();
}
@ -370,11 +369,11 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
}
}
// Try to insert the evicted entries into the secondary cache.
// Free the entries here outside of mutex for performance reasons.
// Try to insert the evicted entries into the secondary cache
// Free the entries here outside of mutex for performance reasons
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
!entry->IsPromoted()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
@ -390,6 +389,7 @@ void LRUCacheShard::Promote(LRUHandle* e) {
assert(secondary_handle->IsReady());
e->SetIncomplete(false);
e->SetInCache(true);
e->SetPromoted(true);
e->value = secondary_handle->Value();
e->charge = secondary_handle->Size();
delete secondary_handle;
@ -403,7 +403,7 @@ void LRUCacheShard::Promote(LRUHandle* e) {
Status s = InsertItem(e, &handle, /*free_handle_on_fail=*/false);
if (!s.ok()) {
// Item is in memory, but not accounted against the cache capacity.
// When the handle is released, the item should get deleted.
// When the handle is released, the item should get deleted
assert(!e->InCache());
}
} else {
@ -436,8 +436,8 @@ Cache::Handle* LRUCacheShard::Lookup(
}
// If handle table lookup failed, then allocate a handle outside the
// mutex if we're going to lookup in the secondary cache.
// Only support synchronous for now.
// mutex if we're going to lookup in the secondary cache
// Only support synchronous for now
// TODO: Support asynchronous lookup in secondary cache
if (!e && secondary_cache_ && helper && helper->saveto_cb) {
// For objects from the secondary cache, we expect the caller to provide
@ -446,9 +446,8 @@ Cache::Handle* LRUCacheShard::Lookup(
// accounting purposes, which we won't demote to the secondary cache
// anyway.
assert(create_cb && helper->del_cb);
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
secondary_cache_->Lookup(key, create_cb, wait, is_in_sec_cache);
secondary_cache_->Lookup(key, create_cb, wait);
if (secondary_handle != nullptr) {
e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
@ -468,9 +467,8 @@ Cache::Handle* LRUCacheShard::Lookup(
if (wait) {
Promote(e);
e->SetIsInSecondaryCache(is_in_sec_cache);
if (!e->value) {
// The secondary cache returned a handle, but the lookup failed.
// The secondary cache returned a handle, but the lookup failed
e->Unref();
e->Free();
e = nullptr;
@ -480,9 +478,8 @@ Cache::Handle* LRUCacheShard::Lookup(
}
} else {
// If wait is false, we always return a handle and let the caller
// release the handle after checking for success or failure.
// release the handle after checking for success or failure
e->SetIncomplete(true);
e->SetIsInSecondaryCache(is_in_sec_cache);
// This may be slightly inaccurate, if the lookup eventually fails.
// But the probability is very low.
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
@ -496,7 +493,7 @@ Cache::Handle* LRUCacheShard::Lookup(
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
// To create another reference - entry must be already externally referenced
assert(e->HasRefs());
e->Ref();
return true;
@ -509,7 +506,7 @@ void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
MaintainPoolSize();
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
if (handle == nullptr) {
return false;
}
@ -519,15 +516,15 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
// The item is still in cache, and nobody else holds a reference to it
if (usage_ > capacity_ || force_erase) {
// The LRU list must be empty since the cache is full
assert(lru_.next == &lru_ || force_erase);
// Take this opportunity and remove the item
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
// Put the item back on the LRU list, and don't free it
LRU_Insert(e);
last_reference = false;
}
@ -544,7 +541,7 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
}
}
// Free the entry here outside of mutex for performance reasons.
// Free the entry here outside of mutex for performance reasons
if (last_reference) {
e->Free();
}
@ -556,8 +553,8 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
void (*deleter)(const Slice& key, void* value),
const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// Allocate the memory here outside of the mutex
// If the cache is full, we'll have to release it
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
@ -605,8 +602,8 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
// Free the entry here outside of mutex for performance reasons
// last_reference will only be true if e != nullptr
if (last_reference) {
e->Free();
}
@ -707,11 +704,11 @@ uint32_t LRUCache::GetHash(Handle* handle) const {
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
// Do not drop data if compile with ASAN to suppress leak warning.
#ifndef MUST_FREE_HEAP_ALLOCATIONS
shards_ = nullptr;
num_shards_ = 0;
}
#endif
}
size_t LRUCache::TEST_GetLRUSize() {
@ -760,8 +757,6 @@ void LRUCache::WaitAll(std::vector<Handle*>& handles) {
}
}
} // namespace lru_cache
std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
@ -769,10 +764,10 @@ std::shared_ptr<Cache> NewLRUCache(
CacheMetadataChargePolicy metadata_charge_policy,
const std::shared_ptr<SecondaryCache>& secondary_cache) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
return nullptr; // the cache cannot be sharded into too many fine pieces
}
if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
// Invalid high_pri_pool_ratio
// invalid high_pri_pool_ratio
return nullptr;
}
if (num_shard_bits < 0) {

66
cache/lru_cache.h vendored
View File

@ -19,7 +19,6 @@
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
// LRU cache implementation. This class is not thread-safe.
@ -82,12 +81,12 @@ struct LRUHandle {
IN_HIGH_PRI_POOL = (1 << 2),
// Whether this entry has had any lookups (hits).
HAS_HIT = (1 << 3),
// Can this be inserted into the secondary cache.
// Can this be inserted into the secondary cache
IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4),
// Is the handle still being read from a lower tier.
// Is the handle still being read from a lower tier
IS_PENDING = (1 << 5),
// Whether this handle is still in a lower tier
IS_IN_SECONDARY_CACHE = (1 << 6),
// Has the item been promoted from a lower tier
IS_PROMOTED = (1 << 6),
};
uint8_t flags;
@ -130,7 +129,7 @@ struct LRUHandle {
#endif // __SANITIZE_THREAD__
}
bool IsPending() const { return flags & IS_PENDING; }
bool IsInSecondaryCache() const { return flags & IS_IN_SECONDARY_CACHE; }
bool IsPromoted() const { return flags & IS_PROMOTED; }
void SetInCache(bool in_cache) {
if (in_cache) {
@ -177,11 +176,11 @@ struct LRUHandle {
}
}
void SetIsInSecondaryCache(bool is_in_secondary_cache) {
if (is_in_secondary_cache) {
flags |= IS_IN_SECONDARY_CACHE;
void SetPromoted(bool promoted) {
if (promoted) {
flags |= IS_PROMOTED;
} else {
flags &= ~IS_IN_SECONDARY_CACHE;
flags &= ~IS_PROMOTED;
}
}
@ -209,7 +208,7 @@ struct LRUHandle {
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
// Calculate the memory usage by metadata
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
@ -217,7 +216,7 @@ struct LRUHandle {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
// This is the size that is used when a new handle is created
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
@ -273,10 +272,10 @@ class LRUHandleTable {
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
// Number of elements currently in the table
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
// Set from max_upper_hash_bits (see constructor)
const int max_length_bits_;
};
@ -292,7 +291,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
// free the needed space
virtual void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
@ -315,7 +314,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
assert(helper);
return Insert(key, hash, value, charge, nullptr, helper, handle, priority);
}
// If helper_cb is null, the values of the following arguments don't matter.
// If helper_cb is null, the values of the following arguments don't
// matter
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const ShardedCache::CacheItemHelper* helper,
const ShardedCache::CreateCallback& create_cb,
@ -326,14 +326,14 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
nullptr);
}
virtual bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
bool force_erase) override {
return Release(handle, force_erase);
}
virtual bool IsReady(Cache::Handle* /*handle*/) override;
virtual void Wait(Cache::Handle* /*handle*/) override {}
virtual bool Ref(Cache::Handle* handle) override;
virtual bool Release(Cache::Handle* handle,
bool erase_if_last_ref = false) override;
bool force_erase = false) override;
virtual void Erase(const Slice& key, uint32_t hash) override;
// Although in some platforms the update of size_t is atomic, to make sure
@ -354,8 +354,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);
// Retrieves number of elements in LRU, for unit test purpose only.
// Not threadsafe.
// Retrieves number of elements in LRU, for unit test purpose only
// not threadsafe
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio
@ -365,16 +365,14 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
// and free_handle_on_fail is true, the item is deleted and handle is set to.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
DeleterFn deleter, const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority);
// Promote an item looked up from the secondary cache to the LRU cache.
// The item may be still in the secondary cache.
// It is only inserted into the hash table and not the LRU list, and only
// Promote an item looked up from the secondary cache to the LRU cache. The
// item is only inserted into the hash table and not the LRU list, and only
// if the cache is not at full capacity, as is the case during Insert. The
// caller should hold a reference on the LRUHandle. When the caller releases
// the last reference, the item is added to the LRU list.
@ -391,7 +389,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
// holding the mutex_
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
@ -431,10 +429,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
// Memory size for entries residing in the cache
size_t usage_;
// Memory size for entries residing only in the LRU list.
// Memory size for entries residing only in the LRU list
size_t lru_usage_;
// mutex_ protects the following state.
@ -469,9 +467,9 @@ class LRUCache
virtual void DisownData() override;
virtual void WaitAll(std::vector<Handle*>& handles) override;
// Retrieves number of elements in LRU, for unit test purpose only.
// Retrieves number of elements in LRU, for unit test purpose only
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio.
// Retrieves high pri pool ratio
double GetHighPriPoolRatio();
private:
@ -480,10 +478,4 @@ class LRUCache
std::shared_ptr<SecondaryCache> secondary_cache_;
};
} // namespace lru_cache
using LRUCache = lru_cache::LRUCache;
using LRUHandle = lru_cache::LRUHandle;
using LRUCacheShard = lru_cache::LRUCacheShard;
} // namespace ROCKSDB_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@ -104,15 +104,14 @@ bool ShardedCache::Ref(Handle* handle) {
return GetShard(Shard(hash))->Ref(handle);
}
bool ShardedCache::Release(Handle* handle, bool erase_if_last_ref) {
bool ShardedCache::Release(Handle* handle, bool force_erase) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, erase_if_last_ref);
return GetShard(Shard(hash))->Release(handle, force_erase);
}
bool ShardedCache::Release(Handle* handle, bool useful,
bool erase_if_last_ref) {
bool ShardedCache::Release(Handle* handle, bool useful, bool force_erase) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, useful, erase_if_last_ref);
return GetShard(Shard(hash))->Release(handle, useful, force_erase);
}
void ShardedCache::Erase(const Slice& key) {

View File

@ -37,11 +37,11 @@ class CacheShard {
Cache::Priority priority, bool wait,
Statistics* stats) = 0;
virtual bool Release(Cache::Handle* handle, bool useful,
bool erase_if_last_ref) = 0;
bool force_erase) = 0;
virtual bool IsReady(Cache::Handle* handle) = 0;
virtual void Wait(Cache::Handle* handle) = 0;
virtual bool Ref(Cache::Handle* handle) = 0;
virtual bool Release(Cache::Handle* handle, bool erase_if_last_ref) = 0;
virtual bool Release(Cache::Handle* handle, bool force_erase) = 0;
virtual void Erase(const Slice& key, uint32_t hash) = 0;
virtual void SetCapacity(size_t capacity) = 0;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
@ -94,11 +94,11 @@ class ShardedCache : public Cache {
const CreateCallback& create_cb, Priority priority,
bool wait, Statistics* stats = nullptr) override;
virtual bool Release(Handle* handle, bool useful,
bool erase_if_last_ref = false) override;
bool force_erase = false) override;
virtual bool IsReady(Handle* handle) override;
virtual void Wait(Handle* handle) override;
virtual bool Ref(Handle* handle) override;
virtual bool Release(Handle* handle, bool erase_if_last_ref = false) override;
virtual bool Release(Handle* handle, bool force_erase = false) override;
virtual void Erase(const Slice& key) override;
virtual uint64_t NewId() override;
virtual size_t GetCapacity() const override;

View File

@ -1,26 +0,0 @@
# - Find liburing
#
# uring_INCLUDE_DIR - Where to find liburing.h
# uring_LIBRARIES - List of libraries when using uring.
# uring_FOUND - True if uring found.
find_path(uring_INCLUDE_DIR
NAMES liburing.h)
find_library(uring_LIBRARIES
NAMES liburing.a liburing)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(uring
DEFAULT_MSG uring_LIBRARIES uring_INCLUDE_DIR)
mark_as_advanced(
uring_INCLUDE_DIR
uring_LIBRARIES)
if(uring_FOUND AND NOT TARGET uring::uring)
add_library(uring::uring UNKNOWN IMPORTED)
set_target_properties(uring::uring PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${uring_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${uring_LIBRARIES}")
endif()

View File

@ -1,30 +0,0 @@
ifndef PYTHON
# Default to python3. Some distros like CentOS 8 do not have `python`.
ifeq ($(origin PYTHON), undefined)
PYTHON := $(shell which python3 || which python || echo python3)
endif
export PYTHON
endif
# To setup tmp directory, first recognize some old variables for setting
# test tmp directory or base tmp directory. TEST_TMPDIR is usually read
# by RocksDB tools though Env/FileSystem::GetTestDirectory.
ifeq ($(TEST_TMPDIR),)
TEST_TMPDIR := $(TMPD)
endif
ifeq ($(TEST_TMPDIR),)
ifeq ($(BASE_TMPDIR),)
BASE_TMPDIR :=$(TMPDIR)
endif
ifeq ($(BASE_TMPDIR),)
BASE_TMPDIR :=/tmp
endif
# Use /dev/shm if it has the sticky bit set (otherwise, /tmp or other
# base dir), and create a randomly-named rocksdb.XXXX directory therein.
TEST_TMPDIR := $(shell f=/dev/shm; test -k $$f || f=$(BASE_TMPDIR); \
perl -le 'use File::Temp "tempdir";' \
-e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)')
endif
export TEST_TMPDIR

View File

@ -12,7 +12,7 @@ fi
ROOT=".."
# Fetch right version of gcov
if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then
source $ROOT/build_tools/fbcode_config_platform009.sh
source $ROOT/build_tools/fbcode_config_platform007.sh
GCOV=$GCC_BASE/bin/gcov
else
GCOV=$(which gcov)
@ -24,7 +24,7 @@ mkdir -p $COVERAGE_DIR
# Find all gcno files to generate the coverage report
PYTHON=${1:-`which python3`}
PYTHON=${1:-`which python`}
echo -e "Using $PYTHON"
GCNO_FILES=`find $ROOT -name "*.gcno"`
$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |

View File

@ -1,93 +0,0 @@
# This file is used by Meta-internal infrastructure as well as by Makefile
# When included from Makefile, there are rules to build DB_STRESS_CMD. When
# used directly with `make -f crashtest.mk ...` there will be no rules to
# build DB_STRESS_CMD so it must exist prior.
DB_STRESS_CMD?=./db_stress
include common.mk
CRASHTEST_MAKE=$(MAKE) -f crash_test.mk
CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD)
.PHONY: crash_test crash_test_with_atomic_flush crash_test_with_txn \
crash_test_with_best_efforts_recovery crash_test_with_ts \
blackbox_crash_test blackbox_crash_test_with_atomic_flush \
blackbox_crash_test_with_txn blackbox_crash_test_with_ts \
blackbox_crash_test_with_best_efforts_recovery \
whitebox_crash_test whitebox_crash_test_with_atomic_flush \
whitebox_crash_test_with_txn whitebox_crash_test_with_ts \
blackbox_crash_test_with_multiops_wc_txn \
blackbox_crash_test_with_multiops_wp_txn
crash_test: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test
$(CRASHTEST_MAKE) blackbox_crash_test
crash_test_with_atomic_flush: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_atomic_flush
$(CRASHTEST_MAKE) blackbox_crash_test_with_atomic_flush
crash_test_with_txn: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_txn
$(CRASHTEST_MAKE) blackbox_crash_test_with_txn
crash_test_with_best_efforts_recovery: blackbox_crash_test_with_best_efforts_recovery
crash_test_with_ts: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_ts
$(CRASHTEST_MAKE) blackbox_crash_test_with_ts
crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn
crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wp_txn
blackbox_crash_test: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --simple blackbox $(CRASH_TEST_EXT_ARGS)
$(CRASHTEST_PY) blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --txn blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_best_efforts_recovery: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_best_efforts_recovery blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_committed blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
ifeq ($(CRASH_TEST_KILL_ODD),)
CRASH_TEST_KILL_ODD=888887
endif
whitebox_crash_test: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --simple whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
$(CRASHTEST_PY) whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --cf_consistency whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --txn whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)

View File

@ -23,7 +23,7 @@ Status ArenaWrappedDBIter::GetProperty(std::string prop_name,
if (prop_name == "rocksdb.iterator.super-version-number") {
// First try to pass the value returned from inner iterator.
if (!db_iter_->GetProperty(prop_name, prop).ok()) {
*prop = std::to_string(sv_number_);
*prop = ToString(sv_number_);
}
return Status::OK();
}
@ -58,7 +58,6 @@ Status ArenaWrappedDBIter::Refresh() {
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
while (true) {
if (sv_number_ != cur_sv_number) {
Env* env = db_iter_->env();
db_iter_->~DBIter();
@ -80,33 +79,9 @@ Status ArenaWrappedDBIter::Refresh() {
read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
latest_seq, /* allow_unprepared_value */ true);
SetIterUnderDBIter(internal_iter);
break;
} else {
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
// Refresh range-tombstones in MemTable
if (!read_options_.ignore_range_deletions) {
SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
ReadRangeDelAggregator* range_del_agg =
db_iter_->GetRangeDelAggregator();
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
range_del_iter.reset(
sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq));
range_del_agg->AddTombstones(std::move(range_del_iter));
cfd_->ReturnThreadLocalSuperVersion(sv);
}
// Refresh latest sequence number
db_iter_->set_sequence(latest_seq);
db_iter_->set_sequence(db_impl_->GetLatestSequenceNumber());
db_iter_->set_valid(false);
// Check again if the latest super version number is changed
uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
if (latest_sv_number != cur_sv_number) {
// If the super version number is changed after refreshing,
// fallback to Re-Init the InternalIterator
cur_sv_number = latest_sv_number;
continue;
}
break;
}
}
return Status::OK();
}

View File

@ -12,6 +12,7 @@
#include <string>
#include "db/db_impl/db_impl.h"
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "db/range_del_aggregator.h"
#include "memory/arena.h"
#include "options/cf_options.h"
@ -33,13 +34,7 @@ class Version;
// the same as the inner DBIter.
class ArenaWrappedDBIter : public Iterator {
public:
~ArenaWrappedDBIter() override {
if (db_iter_ != nullptr) {
db_iter_->~DBIter();
} else {
assert(false);
}
}
virtual ~ArenaWrappedDBIter() { db_iter_->~DBIter(); }
// Get the arena to be used to allocate memory for DBIter to be wrapped,
// as well as child iterators in it.
@ -95,7 +90,7 @@ class ArenaWrappedDBIter : public Iterator {
}
private:
DBIter* db_iter_ = nullptr;
DBIter* db_iter_;
Arena arena_;
uint64_t sv_number_;
ColumnFamilyData* cfd_ = nullptr;

View File

@ -14,7 +14,6 @@
#include "db/dbformat.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/vector_iterator.h"
namespace ROCKSDB_NAMESPACE {
@ -68,7 +67,7 @@ TEST(BlobCountingIteratorTest, CountBlobs) {
assert(keys.size() == values.size());
VectorIterator input(keys, values);
test::VectorIterator input(keys, values);
BlobGarbageMeter blob_garbage_meter;
BlobCountingIterator blob_counter(&input, &blob_garbage_meter);
@ -308,7 +307,7 @@ TEST(BlobCountingIteratorTest, CorruptBlobIndex) {
assert(keys.size() == values.size());
VectorIterator input(keys, values);
test::VectorIterator input(keys, values);
BlobGarbageMeter blob_garbage_meter;
BlobCountingIterator blob_counter(&input, &blob_garbage_meter);

View File

@ -9,26 +9,14 @@
namespace ROCKSDB_NAMESPACE {
Status BlobFetcher::FetchBlob(const Slice& user_key,
const Slice& blob_index_slice,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value,
uint64_t* bytes_read) const {
Status BlobFetcher::FetchBlob(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value) {
Status s;
assert(version_);
return version_->GetBlob(read_options_, user_key, blob_index_slice,
prefetch_buffer, blob_value, bytes_read);
}
Status BlobFetcher::FetchBlob(const Slice& user_key,
const BlobIndex& blob_index,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value,
uint64_t* bytes_read) const {
assert(version_);
return version_->GetBlob(read_options_, user_key, blob_index, prefetch_buffer,
blob_value, bytes_read);
constexpr uint64_t* bytes_read = nullptr;
s = version_->GetBlob(read_options_, user_key, blob_index, blob_value,
bytes_read);
return s;
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -9,29 +9,18 @@
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Version;
class Slice;
class FilePrefetchBuffer;
class PinnableSlice;
class BlobIndex;
// A thin wrapper around the blob retrieval functionality of Version.
class BlobFetcher {
public:
BlobFetcher(const Version* version, const ReadOptions& read_options)
BlobFetcher(Version* version, const ReadOptions& read_options)
: version_(version), read_options_(read_options) {}
Status FetchBlob(const Slice& user_key, const Slice& blob_index_slice,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value, uint64_t* bytes_read) const;
Status FetchBlob(const Slice& user_key, const BlobIndex& blob_index,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value, uint64_t* bytes_read) const;
Status FetchBlob(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value);
private:
const Version* version_;
Version* version_;
ReadOptions read_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -12,7 +12,6 @@
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/blob/blob_log_writer.h"
#include "db/event_helpers.h"
#include "db/version_set.h"
#include "file/filename.h"
#include "file/read_write_util.h"
@ -37,14 +36,13 @@ BlobFileBuilder::BlobFileBuilder(
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs,
immutable_options, mutable_cf_options, file_options,
job_id, column_family_id, column_family_name, io_priority,
write_hint, io_tracer, blob_callback, creation_reason,
blob_file_paths, blob_file_additions) {}
write_hint, io_tracer, blob_callback, blob_file_paths,
blob_file_additions) {}
BlobFileBuilder::BlobFileBuilder(
std::function<uint64_t()> file_number_generator, FileSystem* fs,
@ -55,7 +53,6 @@ BlobFileBuilder::BlobFileBuilder(
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: file_number_generator_(std::move(file_number_generator)),
@ -72,7 +69,6 @@ BlobFileBuilder::BlobFileBuilder(
write_hint_(write_hint),
io_tracer_(io_tracer),
blob_callback_(blob_callback),
creation_reason_(creation_reason),
blob_file_paths_(blob_file_paths),
blob_file_additions_(blob_file_additions),
blob_count_(0),
@ -165,11 +161,6 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
std::string blob_file_path =
BlobFileName(immutable_options_->cf_paths.front().path, blob_file_number);
if (blob_callback_) {
blob_callback_->OnBlobFileCreationStarted(
blob_file_path, column_family_name_, job_id_, creation_reason_);
}
std::unique_ptr<FSWritableFile> file;
{
@ -314,13 +305,6 @@ Status BlobFileBuilder::CloseBlobFile() {
const uint64_t blob_file_number = writer_->get_log_number();
if (blob_callback_) {
s = blob_callback_->OnBlobFileCompleted(
blob_file_paths_->back(), column_family_name_, job_id_,
blob_file_number, creation_reason_, s, checksum_value, checksum_method,
blob_count_, blob_bytes_);
}
assert(blob_file_additions_);
blob_file_additions_->emplace_back(blob_file_number, blob_count_, blob_bytes_,
std::move(checksum_method),
@ -332,6 +316,9 @@ Status BlobFileBuilder::CloseBlobFile() {
" total blobs, %" PRIu64 " total bytes",
column_family_name_.c_str(), job_id_, blob_file_number,
blob_count_, blob_bytes_);
if (blob_callback_) {
s = blob_callback_->OnBlobFileCompleted(blob_file_paths_->back());
}
writer_.reset();
blob_count_ = 0;
@ -353,18 +340,15 @@ Status BlobFileBuilder::CloseBlobFileIfNeeded() {
return CloseBlobFile();
}
void BlobFileBuilder::Abandon(const Status& s) {
void BlobFileBuilder::Abandon() {
if (!IsBlobFileOpen()) {
return;
}
if (blob_callback_) {
// BlobFileBuilder::Abandon() is called because of error while writing to
// Blob files. So we can ignore the below error.
blob_callback_
->OnBlobFileCompleted(blob_file_paths_->back(), column_family_name_,
job_id_, writer_->get_log_number(),
creation_reason_, s, "", "", blob_count_,
blob_bytes_)
blob_callback_->OnBlobFileCompleted(blob_file_paths_->back())
.PermitUncheckedError();
}

View File

@ -13,7 +13,6 @@
#include "rocksdb/compression_type.h"
#include "rocksdb/env.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
@ -42,7 +41,6 @@ class BlobFileBuilder {
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);
@ -56,7 +54,6 @@ class BlobFileBuilder {
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);
@ -67,7 +64,7 @@ class BlobFileBuilder {
Status Add(const Slice& key, const Slice& value, std::string* blob_index);
Status Finish();
void Abandon(const Status& s);
void Abandon();
private:
bool IsBlobFileOpen() const;
@ -92,7 +89,6 @@ class BlobFileBuilder {
Env::WriteLifeTimeHint write_hint_;
std::shared_ptr<IOTracer> io_tracer_;
BlobFileCompletionCallback* blob_callback_;
BlobFileCreationReason creation_reason_;
std::vector<std::string>* blob_file_paths_;
std::vector<BlobFileAddition>* blob_file_additions_;
std::unique_ptr<BlobLogWriter> writer_;

View File

@ -39,10 +39,9 @@ class TestFileNumberGenerator {
class BlobFileBuilderTest : public testing::Test {
protected:
BlobFileBuilderTest() {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
clock_ = mock_env_->GetSystemClock().get();
BlobFileBuilderTest() : mock_env_(Env::Default()) {
fs_ = mock_env_.GetFileSystem().get();
clock_ = mock_env_.GetSystemClock().get();
}
void VerifyBlobFile(uint64_t blob_file_number,
@ -109,7 +108,7 @@ class BlobFileBuilderTest : public testing::Test {
ASSERT_EQ(footer.expiration_range, ExpirationRange());
}
std::unique_ptr<Env> mock_env_;
MockEnv mock_env_;
FileSystem* fs_;
SystemClock* clock_;
FileOptions file_options_;
@ -124,11 +123,11 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileBuilderTest_BuildAndCheckOneFile"),
0);
options.enable_blob_files = true;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -146,7 +145,7 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs(
number_of_blobs);
@ -207,12 +206,12 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileBuilderTest_BuildAndCheckMultipleFiles"),
0);
options.enable_blob_files = true;
options.blob_file_size = value_size;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -230,7 +229,7 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs(
number_of_blobs);
@ -294,12 +293,11 @@ TEST_F(BlobFileBuilderTest, InlinedValues) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_InlinedValues"),
test::PerThreadDBPath(&mock_env_, "BlobFileBuilderTest_InlinedValues"),
0);
options.enable_blob_files = true;
options.min_blob_size = 1024;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -317,7 +315,7 @@ TEST_F(BlobFileBuilderTest, InlinedValues) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
for (size_t i = 0; i < number_of_blobs; ++i) {
const std::string key = std::to_string(i);
@ -349,11 +347,10 @@ TEST_F(BlobFileBuilderTest, Compression) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Compression"),
0);
test::PerThreadDBPath(&mock_env_, "BlobFileBuilderTest_Compression"), 0);
options.enable_blob_files = true;
options.blob_compression_type = kSnappyCompression;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -371,7 +368,7 @@ TEST_F(BlobFileBuilderTest, Compression) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
const std::string key("1");
const std::string uncompressed_value(value_size, 'x');
@ -432,12 +429,11 @@ TEST_F(BlobFileBuilderTest, CompressionError) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_CompressionError"),
test::PerThreadDBPath(&mock_env_, "BlobFileBuilderTest_CompressionError"),
0);
options.enable_blob_files = true;
options.blob_compression_type = kSnappyCompression;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -454,7 +450,7 @@ TEST_F(BlobFileBuilderTest, CompressionError) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
SyncPoint::GetInstance()->SetCallBack("CompressData:TamperWithReturnValue",
[](void* arg) {
@ -510,12 +506,11 @@ TEST_F(BlobFileBuilderTest, Checksum) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Checksum"),
0);
test::PerThreadDBPath(&mock_env_, "BlobFileBuilderTest_Checksum"), 0);
options.enable_blob_files = true;
options.file_checksum_gen_factory =
std::make_shared<DummyFileChecksumGenFactory>();
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -533,7 +528,7 @@ TEST_F(BlobFileBuilderTest, Checksum) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
const std::string key("1");
const std::string value("deadbeef");
@ -580,12 +575,12 @@ class BlobFileBuilderIOErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileBuilderIOErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
}
BlobFileBuilderIOErrorTest()
: mock_env_(Env::Default()),
fs_(mock_env_.GetFileSystem().get()),
sync_point_(GetParam()) {}
std::unique_ptr<Env> mock_env_;
MockEnv mock_env_;
FileSystem* fs_;
FileOptions file_options_;
std::string sync_point_;
@ -607,12 +602,11 @@ TEST_P(BlobFileBuilderIOErrorTest, IOError) {
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderIOErrorTest_IOError"),
test::PerThreadDBPath(&mock_env_, "BlobFileBuilderIOErrorTest_IOError"),
0);
options.enable_blob_files = true;
options.blob_file_size = value_size;
options.env = mock_env_.get();
options.env = &mock_env_;
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
@ -630,7 +624,7 @@ TEST_P(BlobFileBuilderIOErrorTest, IOError) {
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
&blob_file_paths, &blob_file_additions);
SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) {
Status* const s = static_cast<Status*>(arg);

View File

@ -84,18 +84,17 @@ void WriteBlobFile(uint32_t column_family_id,
class BlobFileCacheTest : public testing::Test {
protected:
BlobFileCacheTest() { mock_env_.reset(MockEnv::Create(Env::Default())); }
BlobFileCacheTest() : mock_env_(Env::Default()) {}
std::unique_ptr<Env> mock_env_;
MockEnv mock_env_;
};
TEST_F(BlobFileCacheTest, GetBlobFileReader) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileCacheTest_GetBlobFileReader"),
test::PerThreadDBPath(&mock_env_, "BlobFileCacheTest_GetBlobFileReader"),
0);
options.enable_blob_files = true;
@ -136,10 +135,10 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader) {
TEST_F(BlobFileCacheTest, GetBlobFileReader_Race) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileCacheTest_GetBlobFileReader_Race"),
0);
options.enable_blob_files = true;
@ -188,10 +187,10 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_Race) {
TEST_F(BlobFileCacheTest, GetBlobFileReader_IOError) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileCacheTest_GetBlobFileReader_IOError"),
0);
options.enable_blob_files = true;
@ -222,10 +221,10 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_IOError) {
TEST_F(BlobFileCacheTest, GetBlobFileReader_CacheFull) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileCacheTest_GetBlobFileReader_CacheFull"),
0);
options.enable_blob_files = true;

View File

@ -9,7 +9,6 @@
#pragma once
#include "db/error_handler.h"
#include "db/event_helpers.h"
#include "file/sst_file_manager_impl.h"
#include "rocksdb/status.h"
@ -17,51 +16,23 @@ namespace ROCKSDB_NAMESPACE {
class BlobFileCompletionCallback {
public:
BlobFileCompletionCallback(
SstFileManager* sst_file_manager, InstrumentedMutex* mutex,
ErrorHandler* error_handler, EventLogger* event_logger,
const std::vector<std::shared_ptr<EventListener>>& listeners,
const std::string& dbname)
: event_logger_(event_logger), listeners_(listeners), dbname_(dbname) {
#ifndef ROCKSDB_LITE
sst_file_manager_ = sst_file_manager;
mutex_ = mutex;
error_handler_ = error_handler;
#else
(void)sst_file_manager;
(void)mutex;
(void)error_handler;
#endif // ROCKSDB_LITE
#ifdef ROCKSDB_LITE
BlobFileCompletionCallback(SstFileManager* /*sst_file_manager*/,
InstrumentedMutex* /*mutex*/,
ErrorHandler* /*error_handler*/) {}
Status OnBlobFileCompleted(const std::string& /*file_name*/) {
return Status::OK();
}
void OnBlobFileCreationStarted(const std::string& file_name,
const std::string& column_family_name,
int job_id,
BlobFileCreationReason creation_reason) {
#ifndef ROCKSDB_LITE
// Notify the listeners.
EventHelpers::NotifyBlobFileCreationStarted(listeners_, dbname_,
column_family_name, file_name,
job_id, creation_reason);
#else
(void)file_name;
(void)column_family_name;
(void)job_id;
(void)creation_reason;
#endif
}
BlobFileCompletionCallback(SstFileManager* sst_file_manager,
InstrumentedMutex* mutex,
ErrorHandler* error_handler)
: sst_file_manager_(sst_file_manager),
mutex_(mutex),
error_handler_(error_handler) {}
Status OnBlobFileCompleted(const std::string& file_name,
const std::string& column_family_name, int job_id,
uint64_t file_number,
BlobFileCreationReason creation_reason,
const Status& report_status,
const std::string& checksum_value,
const std::string& checksum_method,
uint64_t blob_count, uint64_t blob_bytes) {
Status OnBlobFileCompleted(const std::string& file_name) {
Status s;
#ifndef ROCKSDB_LITE
auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager_);
if (sfm) {
// Report new blob files to SstFileManagerImpl
@ -74,28 +45,13 @@ class BlobFileCompletionCallback {
error_handler_->SetBGError(s, BackgroundErrorReason::kFlush);
}
}
#endif // !ROCKSDB_LITE
// Notify the listeners.
EventHelpers::LogAndNotifyBlobFileCreationFinished(
event_logger_, listeners_, dbname_, column_family_name, file_name,
job_id, file_number, creation_reason,
(!report_status.ok() ? report_status : s),
(checksum_value.empty() ? kUnknownFileChecksum : checksum_value),
(checksum_method.empty() ? kUnknownFileChecksumFuncName
: checksum_method),
blob_count, blob_bytes);
return s;
}
private:
#ifndef ROCKSDB_LITE
SstFileManager* sst_file_manager_;
InstrumentedMutex* mutex_;
ErrorHandler* error_handler_;
#endif // ROCKSDB_LITE
EventLogger* event_logger_;
std::vector<std::shared_ptr<EventListener>> listeners_;
std::string dbname_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -9,7 +9,6 @@
#include <string>
#include "db/blob/blob_log_format.h"
#include "file/file_prefetch_buffer.h"
#include "file/filename.h"
#include "monitoring/statistics.h"
#include "options/cf_options.h"
@ -148,10 +147,9 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
constexpr uint64_t read_offset = 0;
constexpr size_t read_size = BlobLogHeader::kSize;
// TODO: rate limit reading headers from blob files.
const Status s = ReadFromFile(file_reader, read_offset, read_size,
statistics, &header_slice, &buf, &aligned_buf,
Env::IO_TOTAL /* rate_limiter_priority */);
const Status s =
ReadFromFile(file_reader, read_offset, read_size, statistics,
&header_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
@ -199,10 +197,9 @@ Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
const uint64_t read_offset = file_size - BlobLogFooter::kSize;
constexpr size_t read_size = BlobLogFooter::kSize;
// TODO: rate limit reading footers from blob files.
const Status s = ReadFromFile(file_reader, read_offset, read_size,
statistics, &footer_slice, &buf, &aligned_buf,
Env::IO_TOTAL /* rate_limiter_priority */);
const Status s =
ReadFromFile(file_reader, read_offset, read_size, statistics,
&footer_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
@ -232,8 +229,7 @@ Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Statistics* statistics, Slice* slice,
Buffer* buf, AlignedBuf* aligned_buf,
Env::IOPriority rate_limiter_priority) {
Buffer* buf, AlignedBuf* aligned_buf) {
assert(slice);
assert(buf);
assert(aligned_buf);
@ -248,13 +244,13 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
constexpr char* scratch = nullptr;
s = file_reader->Read(IOOptions(), read_offset, read_size, slice, scratch,
aligned_buf, rate_limiter_priority);
aligned_buf);
} else {
buf->reset(new char[read_size]);
constexpr AlignedBuf* aligned_scratch = nullptr;
s = file_reader->Read(IOOptions(), read_offset, read_size, slice,
buf->get(), aligned_scratch, rate_limiter_priority);
buf->get(), aligned_scratch);
}
if (!s.ok()) {
@ -286,7 +282,6 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
const Slice& user_key, uint64_t offset,
uint64_t value_size,
CompressionType compression_type,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* value,
uint64_t* bytes_read) const {
assert(value);
@ -318,35 +313,19 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
Buffer buf;
AlignedBuf aligned_buf;
bool prefetched = false;
if (prefetch_buffer) {
Status s;
constexpr bool for_compaction = true;
prefetched = prefetch_buffer->TryReadFromCache(
IOOptions(), file_reader_.get(), record_offset,
static_cast<size_t>(record_size), &record_slice, &s,
read_options.rate_limiter_priority, for_compaction);
if (!s.ok()) {
return s;
}
}
if (!prefetched) {
{
TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
const Status s = ReadFromFile(file_reader_.get(), record_offset,
static_cast<size_t>(record_size), statistics_,
&record_slice, &buf, &aligned_buf,
read_options.rate_limiter_priority);
&record_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
}
TEST_SYNC_POINT_CALLBACK("BlobFileReader::GetBlob:TamperWithResult",
&record_slice);
}
if (read_options.verify_checksums) {
const Status s = VerifyBlob(record_slice, user_key, value_size);
@ -372,125 +351,6 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
return Status::OK();
}
void BlobFileReader::MultiGetBlob(
const ReadOptions& read_options,
const autovector<std::reference_wrapper<const Slice>>& user_keys,
const autovector<uint64_t>& offsets,
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
autovector<PinnableSlice*>& values, uint64_t* bytes_read) const {
const size_t num_blobs = user_keys.size();
assert(num_blobs > 0);
assert(num_blobs == offsets.size());
assert(num_blobs == value_sizes.size());
assert(num_blobs == statuses.size());
assert(num_blobs == values.size());
#ifndef NDEBUG
for (size_t i = 0; i < offsets.size() - 1; ++i) {
assert(offsets[i] <= offsets[i + 1]);
}
#endif // !NDEBUG
std::vector<FSReadRequest> read_reqs(num_blobs);
autovector<uint64_t> adjustments;
uint64_t total_len = 0;
for (size_t i = 0; i < num_blobs; ++i) {
const size_t key_size = user_keys[i].get().size();
assert(IsValidBlobOffset(offsets[i], key_size, value_sizes[i], file_size_));
const uint64_t adjustment =
read_options.verify_checksums
? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
: 0;
assert(offsets[i] >= adjustment);
adjustments.push_back(adjustment);
read_reqs[i].offset = offsets[i] - adjustment;
read_reqs[i].len = value_sizes[i] + adjustment;
total_len += read_reqs[i].len;
}
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, total_len);
Buffer buf;
AlignedBuf aligned_buf;
Status s;
bool direct_io = file_reader_->use_direct_io();
if (direct_io) {
for (size_t i = 0; i < read_reqs.size(); ++i) {
read_reqs[i].scratch = nullptr;
}
} else {
buf.reset(new char[total_len]);
std::ptrdiff_t pos = 0;
for (size_t i = 0; i < read_reqs.size(); ++i) {
read_reqs[i].scratch = buf.get() + pos;
pos += read_reqs[i].len;
}
}
TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile");
s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(),
direct_io ? &aligned_buf : nullptr,
read_options.rate_limiter_priority);
if (!s.ok()) {
for (auto& req : read_reqs) {
req.status.PermitUncheckedError();
}
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
*statuses[i] = s;
}
return;
}
assert(s.ok());
for (size_t i = 0; i < num_blobs; ++i) {
auto& req = read_reqs[i];
assert(statuses[i]);
if (req.status.ok() && req.result.size() != req.len) {
req.status = IOStatus::Corruption("Failed to read data from blob file");
}
*statuses[i] = req.status;
}
if (read_options.verify_checksums) {
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
if (!statuses[i]->ok()) {
continue;
}
const Slice& record_slice = read_reqs[i].result;
s = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
if (!s.ok()) {
assert(statuses[i]);
*statuses[i] = s;
}
}
}
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
if (!statuses[i]->ok()) {
continue;
}
const Slice& record_slice = read_reqs[i].result;
const Slice value_slice(record_slice.data() + adjustments[i],
value_sizes[i]);
s = UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
statistics_, values[i]);
if (!s.ok()) {
*statuses[i] = s;
}
}
if (bytes_read) {
uint64_t total_bytes = 0;
for (const auto& req : read_reqs) {
total_bytes += req.result.size();
}
*bytes_read = total_bytes;
}
}
Status BlobFileReader::VerifyBlob(const Slice& record_slice,
const Slice& user_key, uint64_t value_size) {
BlobLogRecord record;

View File

@ -11,7 +11,6 @@
#include "file/random_access_file_reader.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/rocksdb_namespace.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
@ -21,7 +20,6 @@ struct FileOptions;
class HistogramImpl;
struct ReadOptions;
class Slice;
class FilePrefetchBuffer;
class PinnableSlice;
class Statistics;
@ -42,22 +40,9 @@ class BlobFileReader {
Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
uint64_t offset, uint64_t value_size,
CompressionType compression_type,
FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
CompressionType compression_type, PinnableSlice* value,
uint64_t* bytes_read) const;
// offsets must be sorted in ascending order by caller.
void MultiGetBlob(
const ReadOptions& read_options,
const autovector<std::reference_wrapper<const Slice>>& user_keys,
const autovector<uint64_t>& offsets,
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
autovector<PinnableSlice*>& values, uint64_t* bytes_read) const;
CompressionType GetCompressionType() const { return compression_type_; }
uint64_t GetFileSize() const { return file_size_; }
private:
BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
uint64_t file_size, CompressionType compression_type,
@ -83,8 +68,7 @@ class BlobFileReader {
static Status ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Statistics* statistics, Slice* slice, Buffer* buf,
AlignedBuf* aligned_buf,
Env::IOPriority rate_limiter_priority);
AlignedBuf* aligned_buf);
static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
uint64_t value_size);

View File

@ -27,23 +27,23 @@ namespace ROCKSDB_NAMESPACE {
namespace {
// Creates a test blob file with `num` blobs in it.
// Creates a test blob file with a single blob in it. Note: this method
// makes it possible to test various corner cases by allowing the caller
// to specify the contents of various blob file header/footer fields.
void WriteBlobFile(const ImmutableOptions& immutable_options,
uint32_t column_family_id, bool has_ttl,
const ExpirationRange& expiration_range_header,
const ExpirationRange& expiration_range_footer,
uint64_t blob_file_number, const std::vector<Slice>& keys,
const std::vector<Slice>& blobs, CompressionType compression,
std::vector<uint64_t>& blob_offsets,
std::vector<uint64_t>& blob_sizes) {
uint64_t blob_file_number, const Slice& key,
const Slice& blob, CompressionType compression_type,
uint64_t* blob_offset, uint64_t* blob_size) {
assert(!immutable_options.cf_paths.empty());
size_t num = keys.size();
assert(num == blobs.size());
assert(num == blob_offsets.size());
assert(num == blob_sizes.size());
assert(blob_offset);
assert(blob_size);
const std::string blob_file_path =
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number);
std::unique_ptr<FSWritableFile> file;
ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file,
FileOptions()));
@ -59,90 +59,64 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
statistics, blob_file_number, use_fsync,
do_flush);
BlobLogHeader header(column_family_id, compression, has_ttl,
BlobLogHeader header(column_family_id, compression_type, has_ttl,
expiration_range_header);
ASSERT_OK(blob_log_writer.WriteHeader(header));
std::vector<std::string> compressed_blobs(num);
std::vector<Slice> blobs_to_write(num);
if (kNoCompression == compression) {
for (size_t i = 0; i < num; ++i) {
blobs_to_write[i] = blobs[i];
blob_sizes[i] = blobs[i].size();
}
std::string compressed_blob;
Slice blob_to_write;
if (compression_type == kNoCompression) {
blob_to_write = blob;
*blob_size = blob.size();
} else {
CompressionOptions opts;
CompressionContext context(compression);
CompressionContext context(compression_type);
constexpr uint64_t sample_for_compression = 0;
CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(),
compression, sample_for_compression);
compression_type, sample_for_compression);
constexpr uint32_t compression_format_version = 2;
for (size_t i = 0; i < num; ++i) {
ASSERT_TRUE(CompressData(blobs[i], info, compression_format_version,
&compressed_blobs[i]));
blobs_to_write[i] = compressed_blobs[i];
blob_sizes[i] = compressed_blobs[i].size();
}
ASSERT_TRUE(
CompressData(blob, info, compression_format_version, &compressed_blob));
blob_to_write = compressed_blob;
*blob_size = compressed_blob.size();
}
for (size_t i = 0; i < num; ++i) {
uint64_t key_offset = 0;
ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset,
&blob_offsets[i]));
}
ASSERT_OK(
blob_log_writer.AddRecord(key, blob_to_write, &key_offset, blob_offset));
BlobLogFooter footer;
footer.blob_count = num;
footer.blob_count = 1;
footer.expiration_range = expiration_range_footer;
std::string checksum_method;
std::string checksum_value;
ASSERT_OK(
blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
}
// Creates a test blob file with a single blob in it. Note: this method
// makes it possible to test various corner cases by allowing the caller
// to specify the contents of various blob file header/footer fields.
void WriteBlobFile(const ImmutableOptions& immutable_options,
uint32_t column_family_id, bool has_ttl,
const ExpirationRange& expiration_range_header,
const ExpirationRange& expiration_range_footer,
uint64_t blob_file_number, const Slice& key,
const Slice& blob, CompressionType compression,
uint64_t* blob_offset, uint64_t* blob_size) {
std::vector<Slice> keys{key};
std::vector<Slice> blobs{blob};
std::vector<uint64_t> blob_offsets{0};
std::vector<uint64_t> blob_sizes{0};
WriteBlobFile(immutable_options, column_family_id, has_ttl,
expiration_range_header, expiration_range_footer,
blob_file_number, keys, blobs, compression, blob_offsets,
blob_sizes);
if (blob_offset) {
*blob_offset = blob_offsets[0];
}
if (blob_size) {
*blob_size = blob_sizes[0];
}
}
} // anonymous namespace
class BlobFileReaderTest : public testing::Test {
protected:
BlobFileReaderTest() { mock_env_.reset(MockEnv::Create(Env::Default())); }
std::unique_ptr<Env> mock_env_;
BlobFileReaderTest() : mock_env_(Env::Default()) {}
MockEnv mock_env_;
};
TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderTest_CreateReaderAndGetBlob"),
0);
options.enable_blob_files = true;
@ -153,19 +127,15 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr size_t num_blobs = 3;
const std::vector<std::string> key_strs = {"key1", "key2", "key3"};
const std::vector<std::string> blob_strs = {"blob1", "blob2", "blob3"};
constexpr char key[] = "key";
constexpr char blob[] = "blob";
const std::vector<Slice> keys = {key_strs[0], key_strs[1], key_strs[2]};
const std::vector<Slice> blobs = {blob_strs[0], blob_strs[1], blob_strs[2]};
std::vector<uint64_t> blob_offsets(keys.size());
std::vector<uint64_t> blob_sizes(keys.size());
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, keys, blobs, kNoCompression,
blob_offsets, blob_sizes);
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
@ -179,42 +149,14 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
ReadOptions read_options;
read_options.verify_checksums = false;
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, keys[0], blob_offsets[0],
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read));
ASSERT_EQ(value, blobs[0]);
ASSERT_EQ(bytes_read, blob_sizes[0]);
// MultiGetBlob
bytes_read = 0;
size_t total_size = 0;
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_OK(statuses_buf[i]);
ASSERT_EQ(value_buf[i], blobs[i]);
total_size += blob_sizes[i];
}
ASSERT_EQ(bytes_read, total_size);
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kNoCompression, &value, &bytes_read));
ASSERT_EQ(value, blob);
ASSERT_EQ(bytes_read, blob_size);
}
read_options.verify_checksums = true;
@ -223,15 +165,14 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, keys[1], blob_offsets[1],
blob_sizes[1], kNoCompression, prefetch_buffer,
&value, &bytes_read));
ASSERT_EQ(value, blobs[1]);
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kNoCompression, &value, &bytes_read));
ASSERT_EQ(value, blob);
const uint64_t key_size = keys[1].size();
constexpr uint64_t key_size = sizeof(key) - 1;
ASSERT_EQ(bytes_read,
BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
blob_sizes[1]);
blob_size);
}
// Invalid offset (too close to start of file)
@ -240,9 +181,8 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[0], blob_offsets[0] - 1,
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
->GetBlob(read_options, key, blob_offset - 1, blob_size,
kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
@ -253,9 +193,8 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[2], blob_offsets[2] + 1,
blob_sizes[2], kNoCompression, prefetch_buffer,
&value, &bytes_read)
->GetBlob(read_options, key, blob_offset + 1, blob_size,
kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
@ -266,9 +205,8 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[0], blob_offsets[0],
blob_sizes[0], kZSTD, prefetch_buffer, &value,
&bytes_read)
->GetBlob(read_options, key, blob_offset, blob_size, kZSTD,
&value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
@ -281,82 +219,23 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
ASSERT_TRUE(reader
->GetBlob(read_options, shorter_key,
blob_offsets[0] -
(keys[0].size() - sizeof(shorter_key) + 1),
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
blob_offset - (sizeof(key) - sizeof(shorter_key)),
blob_size, kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
Slice shorter_key_slice(shorter_key, sizeof(shorter_key) - 1);
key_refs[1] = std::cref(shorter_key_slice);
autovector<uint64_t> offsets{
blob_offsets[0],
blob_offsets[1] - (keys[1].size() - key_refs[1].get().size()),
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i == 1) {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
} else {
ASSERT_OK(statuses_buf[i]);
}
}
}
// Incorrect key
{
constexpr char incorrect_key[] = "foo1";
constexpr char incorrect_key[] = "foo";
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, incorrect_key, blob_offsets[0],
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
->GetBlob(read_options, incorrect_key, blob_offset,
blob_size, kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
Slice wrong_key_slice(incorrect_key, sizeof(incorrect_key) - 1);
key_refs[2] = std::cref(wrong_key_slice);
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i == num_blobs - 1) {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
} else {
ASSERT_OK(statuses_buf[i]);
}
}
}
// Incorrect value size
@ -365,35 +244,10 @@ TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[1], blob_offsets[1],
blob_sizes[1] + 1, kNoCompression,
prefetch_buffer, &value, &bytes_read)
->GetBlob(read_options, key, blob_offset, blob_size + 1,
kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1] + 1, blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i != 1) {
ASSERT_OK(statuses_buf[i]);
} else {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
}
}
}
}
@ -402,10 +256,9 @@ TEST_F(BlobFileReaderTest, Malformed) {
// detect the error when we open it for reading
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Malformed"),
0);
test::PerThreadDBPath(&mock_env_, "BlobFileReaderTest_Malformed"), 0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
@ -455,9 +308,9 @@ TEST_F(BlobFileReaderTest, Malformed) {
TEST_F(BlobFileReaderTest, TTL) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_TTL"), 0);
test::PerThreadDBPath(&mock_env_, "BlobFileReaderTest_TTL"), 0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
@ -489,9 +342,9 @@ TEST_F(BlobFileReaderTest, TTL) {
TEST_F(BlobFileReaderTest, ExpirationRangeInHeader) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderTest_ExpirationRangeInHeader"),
0);
options.enable_blob_files = true;
@ -528,9 +381,9 @@ TEST_F(BlobFileReaderTest, ExpirationRangeInHeader) {
TEST_F(BlobFileReaderTest, ExpirationRangeInFooter) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderTest_ExpirationRangeInFooter"),
0);
options.enable_blob_files = true;
@ -567,9 +420,9 @@ TEST_F(BlobFileReaderTest, ExpirationRangeInFooter) {
TEST_F(BlobFileReaderTest, IncorrectColumnFamily) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderTest_IncorrectColumnFamily"),
0);
options.enable_blob_files = true;
@ -605,10 +458,9 @@ TEST_F(BlobFileReaderTest, IncorrectColumnFamily) {
TEST_F(BlobFileReaderTest, BlobCRCError) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_BlobCRCError"),
0);
test::PerThreadDBPath(&mock_env_, "BlobFileReaderTest_BlobCRCError"), 0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
@ -645,14 +497,12 @@ TEST_F(BlobFileReaderTest, BlobCRCError) {
SyncPoint::GetInstance()->EnableProcessing();
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
@ -666,10 +516,9 @@ TEST_F(BlobFileReaderTest, Compression) {
}
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Compression"),
0);
test::PerThreadDBPath(&mock_env_, "BlobFileReaderTest_Compression"), 0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
@ -700,15 +549,12 @@ TEST_F(BlobFileReaderTest, Compression) {
ReadOptions read_options;
read_options.verify_checksums = false;
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read));
kSnappyCompression, &value, &bytes_read));
ASSERT_EQ(value, blob);
ASSERT_EQ(bytes_read, blob_size);
}
@ -720,8 +566,7 @@ TEST_F(BlobFileReaderTest, Compression) {
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read));
kSnappyCompression, &value, &bytes_read));
ASSERT_EQ(value, blob);
constexpr uint64_t key_size = sizeof(key) - 1;
@ -737,9 +582,9 @@ TEST_F(BlobFileReaderTest, UncompressionError) {
}
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderTest_UncompressionError"),
0);
options.enable_blob_files = true;
@ -779,14 +624,12 @@ TEST_F(BlobFileReaderTest, UncompressionError) {
SyncPoint::GetInstance()->EnableProcessing();
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read)
kSnappyCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
@ -798,13 +641,13 @@ class BlobFileReaderIOErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileReaderIOErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
fault_injection_env_.reset(new FaultInjectionTestEnv(mock_env_.get()));
}
BlobFileReaderIOErrorTest()
: mock_env_(Env::Default()),
fault_injection_env_(&mock_env_),
sync_point_(GetParam()) {}
std::unique_ptr<Env> mock_env_;
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env_;
MockEnv mock_env_;
FaultInjectionTestEnv fault_injection_env_;
std::string sync_point_;
};
@ -820,9 +663,9 @@ TEST_P(BlobFileReaderIOErrorTest, IOError) {
// Simulates an I/O error during the specified step
Options options;
options.env = fault_injection_env_.get();
options.env = &fault_injection_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(fault_injection_env_.get(),
test::PerThreadDBPath(&fault_injection_env_,
"BlobFileReaderIOErrorTest_IOError"),
0);
options.enable_blob_files = true;
@ -844,7 +687,7 @@ TEST_P(BlobFileReaderIOErrorTest, IOError) {
&blob_offset, &blob_size);
SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
fault_injection_env_->SetFilesystemActive(false,
fault_injection_env_.SetFilesystemActive(false,
Status::IOError(sync_point_));
});
SyncPoint::GetInstance()->EnableProcessing();
@ -865,14 +708,12 @@ TEST_P(BlobFileReaderIOErrorTest, IOError) {
} else {
ASSERT_OK(s);
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
kNoCompression, &value, &bytes_read)
.IsIOError());
ASSERT_EQ(bytes_read, 0);
}
@ -885,11 +726,10 @@ class BlobFileReaderDecodingErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileReaderDecodingErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
}
BlobFileReaderDecodingErrorTest()
: mock_env_(Env::Default()), sync_point_(GetParam()) {}
std::unique_ptr<Env> mock_env_;
MockEnv mock_env_;
std::string sync_point_;
};
@ -901,9 +741,9 @@ INSTANTIATE_TEST_CASE_P(BlobFileReaderTest, BlobFileReaderDecodingErrorTest,
TEST_P(BlobFileReaderDecodingErrorTest, DecodingError) {
Options options;
options.env = mock_env_.get();
options.env = &mock_env_;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
test::PerThreadDBPath(&mock_env_,
"BlobFileReaderDecodingErrorTest_DecodingError"),
0);
options.enable_blob_files = true;
@ -950,14 +790,12 @@ TEST_P(BlobFileReaderDecodingErrorTest, DecodingError) {
} else {
ASSERT_OK(s);
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
kNoCompression, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}

View File

@ -52,9 +52,6 @@ class BlobIndex {
BlobIndex() : type_(Type::kUnknown) {}
BlobIndex(const BlobIndex&) = default;
BlobIndex& operator=(const BlobIndex&) = default;
bool IsInlined() const { return type_ == Type::kInlinedTTL; }
bool HasTTL() const {
@ -96,9 +93,9 @@ class BlobIndex {
assert(slice.size() > 0);
type_ = static_cast<Type>(*slice.data());
if (type_ >= Type::kUnknown) {
return Status::Corruption(kErrorMessage,
"Unknown blob index type: " +
std::to_string(static_cast<char>(type_)));
return Status::Corruption(
kErrorMessage,
"Unknown blob index type: " + ToString(static_cast<char>(type_)));
}
slice = Slice(slice.data() + 1, slice.size() - 1);
if (HasTTL()) {

View File

@ -28,10 +28,8 @@ Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
assert(file_);
StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
// TODO: rate limit `BlobLogSequentialReader` reads (it appears unused?)
Status s =
file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size), slice,
buf, nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size),
slice, buf, nullptr);
next_byte_ += size;
if (!s.ok()) {
return s;

View File

@ -4,10 +4,8 @@
// (found in the LICENSE.Apache file in the root directory).
#include <array>
#include <sstream>
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
@ -127,237 +125,6 @@ TEST_F(DBBlobBasicTest, MultiGetBlobs) {
}
}
#ifndef ROCKSDB_LITE
TEST_F(DBBlobBasicTest, MultiGetWithDirectIO) {
Options options = GetDefaultOptions();
// First, create an external SST file ["b"].
const std::string file_path = dbname_ + "/test.sst";
{
SstFileWriter sst_file_writer(EnvOptions(), GetDefaultOptions());
Status s = sst_file_writer.Open(file_path);
ASSERT_OK(s);
ASSERT_OK(sst_file_writer.Put("b", "b_value"));
ASSERT_OK(sst_file_writer.Finish());
}
options.enable_blob_files = true;
options.min_blob_size = 1000;
options.use_direct_reads = true;
options.allow_ingest_behind = true;
// Open DB with fixed-prefix sst-partitioner so that compaction will cut
// new table file when encountering a new key whose 1-byte prefix changes.
constexpr size_t key_len = 1;
options.sst_partitioner_factory =
NewSstPartitionerFixedPrefixFactory(key_len);
Status s = TryReopen(options);
if (s.IsInvalidArgument()) {
ROCKSDB_GTEST_SKIP("This test requires direct IO support");
return;
}
ASSERT_OK(s);
constexpr size_t num_keys = 3;
constexpr size_t blob_size = 3000;
constexpr char first_key[] = "a";
const std::string first_blob(blob_size, 'a');
ASSERT_OK(Put(first_key, first_blob));
constexpr char second_key[] = "b";
const std::string second_blob(2 * blob_size, 'b');
ASSERT_OK(Put(second_key, second_blob));
constexpr char third_key[] = "d";
const std::string third_blob(blob_size, 'd');
ASSERT_OK(Put(third_key, third_blob));
// first_blob, second_blob and third_blob in the same blob file.
// SST Blob file
// L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'|
// | | | ^ ^ ^
// | | | | | |
// | | +---------|-------|--------+
// | +-----------------|-------+
// +-------------------------+
ASSERT_OK(Flush());
constexpr char fourth_key[] = "c";
const std::string fourth_blob(blob_size, 'c');
ASSERT_OK(Put(fourth_key, fourth_blob));
// fourth_blob in another blob file.
// SST Blob file SST Blob file
// L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| ["c"] |'cccc'|
// | | | ^ ^ ^ | ^
// | | | | | | | |
// | | +---------|-------|--------+ +-------+
// | +-----------------|-------+
// +-------------------------+
ASSERT_OK(Flush());
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
/*end=*/nullptr));
// Due to the above sst partitioner, we get 4 L1 files. The blob files are
// unchanged.
// |'aaaa', 'bbbb', 'dddd'| |'cccc'|
// ^ ^ ^ ^
// | | | |
// L0 | | | |
// L1 ["a"] ["b"] ["c"] | | ["d"] |
// | | | | | |
// | | +---------|-------|---------------+
// | +-----------------|-------+
// +-------------------------+
ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
{
// Ingest the external SST file into bottommost level.
std::vector<std::string> ext_files{file_path};
IngestExternalFileOptions opts;
opts.ingest_behind = true;
ASSERT_OK(
db_->IngestExternalFile(db_->DefaultColumnFamily(), ext_files, opts));
}
// Now the database becomes as follows.
// |'aaaa', 'bbbb', 'dddd'| |'cccc'|
// ^ ^ ^ ^
// | | | |
// L0 | | | |
// L1 ["a"] ["b"] ["c"] | | ["d"] |
// | | | | | |
// | | +---------|-------|---------------+
// | +-----------------|-------+
// +-------------------------+
//
// L6 ["b"]
{
// Compact ["b"] to bottommost level.
Slice begin = Slice(second_key);
Slice end = Slice(second_key);
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, &begin, &end));
}
// |'aaaa', 'bbbb', 'dddd'| |'cccc'|
// ^ ^ ^ ^
// | | | |
// L0 | | | |
// L1 ["a"] ["c"] | | ["d"] |
// | | | | |
// | +---------|-------|---------------+
// | +-----------------|-------+
// +-------|-----------------+
// |
// L6 ["b"]
ASSERT_EQ(3, NumTableFilesAtLevel(/*level=*/1));
ASSERT_EQ(1, NumTableFilesAtLevel(/*level=*/6));
bool called = false;
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack(
"RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* arg) {
auto* aligned_reqs = static_cast<std::vector<FSReadRequest>*>(arg);
assert(aligned_reqs);
ASSERT_EQ(1, aligned_reqs->size());
called = true;
});
SyncPoint::GetInstance()->EnableProcessing();
std::array<Slice, num_keys> keys{{first_key, third_key, second_key}};
{
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;
// The MultiGet(), when constructing the KeyContexts, will process the keys
// in such order: a, d, b. The reason is that ["a"] and ["d"] are in L1,
// while ["b"] resides in L6.
// Consequently, the original FSReadRequest list prepared by
// Version::MultiGetblob() will be for "a", "d" and "b". It is unsorted as
// follows:
//
// ["a", offset=30, len=3033],
// ["d", offset=9096, len=3033],
// ["b", offset=3063, len=6033]
//
// If we do not sort them before calling MultiRead() in DirectIO, then the
// underlying IO merging logic will yield two requests.
//
// [offset=0, len=4096] (for "a")
// [offset=0, len=12288] (result of merging the request for "d" and "b")
//
// We need to sort them in Version::MultiGetBlob() so that the underlying
// IO merging logic in DirectIO mode works as expected. The correct
// behavior will be one aligned request:
//
// [offset=0, len=12288]
db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, &keys[0],
&values[0], &statuses[0]);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_TRUE(called);
ASSERT_OK(statuses[0]);
ASSERT_EQ(values[0], first_blob);
ASSERT_OK(statuses[1]);
ASSERT_EQ(values[1], third_blob);
ASSERT_OK(statuses[2]);
ASSERT_EQ(values[2], second_blob);
}
}
#endif // !ROCKSDB_LITE
TEST_F(DBBlobBasicTest, MultiGetBlobsFromMultipleFiles) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
Reopen(options);
constexpr size_t kNumBlobFiles = 3;
constexpr size_t kNumBlobsPerFile = 3;
constexpr size_t kNumKeys = kNumBlobsPerFile * kNumBlobFiles;
std::vector<std::string> key_strs;
std::vector<std::string> value_strs;
for (size_t i = 0; i < kNumBlobFiles; ++i) {
for (size_t j = 0; j < kNumBlobsPerFile; ++j) {
std::string key = "key" + std::to_string(i) + "_" + std::to_string(j);
std::string value =
"value_as_blob" + std::to_string(i) + "_" + std::to_string(j);
ASSERT_OK(Put(key, value));
key_strs.push_back(key);
value_strs.push_back(value);
}
ASSERT_OK(Flush());
}
assert(key_strs.size() == kNumKeys);
std::array<Slice, kNumKeys> keys;
for (size_t i = 0; i < keys.size(); ++i) {
keys[i] = key_strs[i];
}
std::array<PinnableSlice, kNumKeys> values;
std::array<Status, kNumKeys> statuses;
db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), kNumKeys, &keys[0],
&values[0], &statuses[0]);
for (size_t i = 0; i < kNumKeys; ++i) {
ASSERT_OK(statuses[i]);
ASSERT_EQ(value_strs[i], values[i]);
}
}
TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
@ -366,116 +133,19 @@ TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) {
Reopen(options);
constexpr char key[] = "key";
constexpr char blob[] = "blob";
ASSERT_OK(Put(key, blob));
// Fake a corrupt blob index.
const std::string blob_index("foobar");
WriteBatch batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
ASSERT_OK(db_->Write(WriteOptions(), &batch));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"Version::Get::TamperWithBlobIndex", [](void* arg) {
Slice* const blob_index = static_cast<Slice*>(arg);
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
PinnableSlice result;
ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
.IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.create_if_missing = true;
DestroyAndReopen(options);
constexpr size_t kNumOfKeys = 3;
std::array<std::string, kNumOfKeys> key_strs;
std::array<std::string, kNumOfKeys> value_strs;
std::array<Slice, kNumOfKeys + 1> keys;
for (size_t i = 0; i < kNumOfKeys; ++i) {
key_strs[i] = "foo" + std::to_string(i);
value_strs[i] = "blob_value" + std::to_string(i);
ASSERT_OK(Put(key_strs[i], value_strs[i]));
keys[i] = key_strs[i];
}
constexpr char key[] = "key";
constexpr char blob[] = "blob";
ASSERT_OK(Put(key, blob));
keys[kNumOfKeys] = key;
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"Version::MultiGet::TamperWithBlobIndex", [&key](void* arg) {
KeyContext* const key_context = static_cast<KeyContext*>(arg);
assert(key_context);
assert(key_context->key);
if (*(key_context->key) == key) {
Slice* const blob_index = key_context->value;
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
}
});
SyncPoint::GetInstance()->EnableProcessing();
std::array<PinnableSlice, kNumOfKeys + 1> values;
std::array<Status, kNumOfKeys + 1> statuses;
db_->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys + 1,
keys.data(), values.data(), statuses.data(),
/*sorted_input=*/false);
for (size_t i = 0; i < kNumOfKeys + 1; ++i) {
if (i != kNumOfKeys) {
ASSERT_OK(statuses[i]);
ASSERT_EQ("blob_value" + std::to_string(i), values[i]);
} else {
ASSERT_TRUE(statuses[i].IsCorruption());
}
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(DBBlobBasicTest, MultiGetBlob_ExceedSoftLimit) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
Reopen(options);
constexpr size_t kNumOfKeys = 3;
std::array<std::string, kNumOfKeys> key_bufs;
std::array<std::string, kNumOfKeys> value_bufs;
std::array<Slice, kNumOfKeys> keys;
for (size_t i = 0; i < kNumOfKeys; ++i) {
key_bufs[i] = "foo" + std::to_string(i);
value_bufs[i] = "blob_value" + std::to_string(i);
ASSERT_OK(Put(key_bufs[i], value_bufs[i]));
keys[i] = key_bufs[i];
}
ASSERT_OK(Flush());
std::array<PinnableSlice, kNumOfKeys> values;
std::array<Status, kNumOfKeys> statuses;
ReadOptions read_opts;
read_opts.value_size_soft_limit = 1;
db_->MultiGet(read_opts, dbfull()->DefaultColumnFamily(), kNumOfKeys,
keys.data(), values.data(), statuses.data(),
/*sorted_input=*/true);
for (const auto& s : statuses) {
ASSERT_TRUE(s.IsAborted());
}
}
TEST_F(DBBlobBasicTest, GetBlob_InlinedTTLIndex) {
@ -697,171 +367,6 @@ TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) {
ASSERT_EQ(values[2], "v2_0");
}
#ifndef ROCKSDB_LITE
TEST_F(DBBlobBasicTest, Properties) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
Reopen(options);
constexpr char key1[] = "key1";
constexpr size_t key1_size = sizeof(key1) - 1;
constexpr char key2[] = "key2";
constexpr size_t key2_size = sizeof(key2) - 1;
constexpr char key3[] = "key3";
constexpr size_t key3_size = sizeof(key3) - 1;
constexpr char blob[] = "00000000000000";
constexpr size_t blob_size = sizeof(blob) - 1;
constexpr char longer_blob[] = "00000000000000000000";
constexpr size_t longer_blob_size = sizeof(longer_blob) - 1;
ASSERT_OK(Put(key1, blob));
ASSERT_OK(Put(key2, longer_blob));
ASSERT_OK(Flush());
constexpr size_t first_blob_file_expected_size =
BlobLogHeader::kSize +
BlobLogRecord::CalculateAdjustmentForRecordHeader(key1_size) + blob_size +
BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
longer_blob_size + BlobLogFooter::kSize;
ASSERT_OK(Put(key3, blob));
ASSERT_OK(Flush());
constexpr size_t second_blob_file_expected_size =
BlobLogHeader::kSize +
BlobLogRecord::CalculateAdjustmentForRecordHeader(key3_size) + blob_size +
BlobLogFooter::kSize;
constexpr size_t total_expected_size =
first_blob_file_expected_size + second_blob_file_expected_size;
// Number of blob files
uint64_t num_blob_files = 0;
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kNumBlobFiles, &num_blob_files));
ASSERT_EQ(num_blob_files, 2);
// Total size of live blob files
uint64_t live_blob_file_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileSize,
&live_blob_file_size));
ASSERT_EQ(live_blob_file_size, total_expected_size);
// Total amount of garbage in live blob files
{
uint64_t live_blob_file_garbage_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
&live_blob_file_garbage_size));
ASSERT_EQ(live_blob_file_garbage_size, 0);
}
// Total size of all blob files across all versions
// Note: this should be the same as above since we only have one
// version at this point.
uint64_t total_blob_file_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
&total_blob_file_size));
ASSERT_EQ(total_blob_file_size, total_expected_size);
// Delete key2 to create some garbage
ASSERT_OK(Delete(key2));
ASSERT_OK(Flush());
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
constexpr size_t expected_garbage_size =
BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
longer_blob_size;
constexpr double expected_space_amp =
static_cast<double>(total_expected_size) /
(total_expected_size - expected_garbage_size);
// Blob file stats
std::string blob_stats;
ASSERT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &blob_stats));
std::ostringstream oss;
oss << "Number of blob files: 2\nTotal size of blob files: "
<< total_expected_size
<< "\nTotal size of garbage in blob files: " << expected_garbage_size
<< "\nBlob file space amplification: " << expected_space_amp << '\n';
ASSERT_EQ(blob_stats, oss.str());
// Total amount of garbage in live blob files
{
uint64_t live_blob_file_garbage_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
&live_blob_file_garbage_size));
ASSERT_EQ(live_blob_file_garbage_size, expected_garbage_size);
}
}
TEST_F(DBBlobBasicTest, PropertiesMultiVersion) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
Reopen(options);
constexpr char key1[] = "key1";
constexpr char key2[] = "key2";
constexpr char key3[] = "key3";
constexpr size_t key_size = sizeof(key1) - 1;
static_assert(sizeof(key2) - 1 == key_size, "unexpected size: key2");
static_assert(sizeof(key3) - 1 == key_size, "unexpected size: key3");
constexpr char blob[] = "0000000000";
constexpr size_t blob_size = sizeof(blob) - 1;
ASSERT_OK(Put(key1, blob));
ASSERT_OK(Flush());
ASSERT_OK(Put(key2, blob));
ASSERT_OK(Flush());
// Create an iterator to keep the current version alive
std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
ASSERT_OK(iter->status());
// Note: the Delete and subsequent compaction results in the first blob file
// not making it to the final version. (It is still part of the previous
// version kept alive by the iterator though.) On the other hand, the Put
// results in a third blob file.
ASSERT_OK(Delete(key1));
ASSERT_OK(Put(key3, blob));
ASSERT_OK(Flush());
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
// Total size of all blob files across all versions: between the two versions,
// we should have three blob files of the same size with one blob each.
// The version kept alive by the iterator contains the first and the second
// blob file, while the final version contains the second and the third blob
// file. (The second blob file is thus shared by the two versions but should
// be counted only once.)
uint64_t total_blob_file_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
&total_blob_file_size));
ASSERT_EQ(total_blob_file_size,
3 * (BlobLogHeader::kSize +
BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
blob_size + BlobLogFooter::kSize));
}
#endif // !ROCKSDB_LITE
class DBBlobBasicIOErrorTest : public DBBlobBasicTest,
public testing::WithParamInterface<std::string> {
protected:
@ -874,21 +379,11 @@ class DBBlobBasicIOErrorTest : public DBBlobBasicTest,
std::string sync_point_;
};
class DBBlobBasicIOErrorMultiGetTest : public DBBlobBasicIOErrorTest {
public:
DBBlobBasicIOErrorMultiGetTest() : DBBlobBasicIOErrorTest() {}
};
INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorTest,
::testing::ValuesIn(std::vector<std::string>{
"BlobFileReader::OpenFile:NewRandomAccessFile",
"BlobFileReader::GetBlob:ReadFromFile"}));
INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorMultiGetTest,
::testing::ValuesIn(std::vector<std::string>{
"BlobFileReader::OpenFile:NewRandomAccessFile",
"BlobFileReader::MultiGetBlob:ReadFromFile"}));
TEST_P(DBBlobBasicIOErrorTest, GetBlob_IOError) {
Options options;
options.env = fault_injection_env_.get();
@ -918,7 +413,7 @@ TEST_P(DBBlobBasicIOErrorTest, GetBlob_IOError) {
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_P(DBBlobBasicIOErrorMultiGetTest, MultiGetBlobs_IOError) {
TEST_P(DBBlobBasicIOErrorTest, MultiGetBlobs_IOError) {
Options options = GetDefaultOptions();
options.env = fault_injection_env_.get();
options.enable_blob_files = true;
@ -960,53 +455,6 @@ TEST_P(DBBlobBasicIOErrorMultiGetTest, MultiGetBlobs_IOError) {
ASSERT_TRUE(statuses[1].IsIOError());
}
TEST_P(DBBlobBasicIOErrorMultiGetTest, MultipleBlobFiles) {
Options options = GetDefaultOptions();
options.env = fault_injection_env_.get();
options.enable_blob_files = true;
options.min_blob_size = 0;
Reopen(options);
constexpr size_t num_keys = 2;
constexpr char key1[] = "key1";
constexpr char value1[] = "blob1";
ASSERT_OK(Put(key1, value1));
ASSERT_OK(Flush());
constexpr char key2[] = "key2";
constexpr char value2[] = "blob2";
ASSERT_OK(Put(key2, value2));
ASSERT_OK(Flush());
std::array<Slice, num_keys> keys{{key1, key2}};
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;
bool first_blob_file = true;
SyncPoint::GetInstance()->SetCallBack(
sync_point_, [&first_blob_file, this](void* /* arg */) {
if (first_blob_file) {
first_blob_file = false;
return;
}
fault_injection_env_->SetFilesystemActive(false,
Status::IOError(sync_point_));
});
SyncPoint::GetInstance()->EnableProcessing();
db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
keys.data(), values.data(), statuses.data());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_OK(statuses[0]);
ASSERT_EQ(value1, values[0]);
ASSERT_TRUE(statuses[1].IsIOError());
}
namespace {
class ReadBlobCompactionFilter : public CompactionFilter {
@ -1065,6 +513,5 @@ TEST_P(DBBlobBasicIOErrorTest, CompactionFilterReadBlob_IOError) {
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -8,6 +8,7 @@
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
#include "utilities/fault_injection_env.h"
namespace ROCKSDB_NAMESPACE {
@ -18,7 +19,7 @@ class DBBlobCompactionTest : public DBTestBase {
#ifndef ROCKSDB_LITE
const std::vector<InternalStats::CompactionStats>& GetCompactionStats() {
VersionSet* const versions = dbfull()->GetVersionSet();
VersionSet* const versions = dbfull()->TEST_GetVersionSet();
assert(versions);
assert(versions->GetColumnFamilySet());
@ -415,30 +416,16 @@ TEST_F(DBBlobCompactionTest, CorruptedBlobIndex) {
new ValueMutationFilter(""));
options.compaction_filter = compaction_filter_guard.get();
DestroyAndReopen(options);
// Mock a corrupted blob index
constexpr char key[] = "key";
constexpr char blob[] = "blob";
ASSERT_OK(Put(key, blob));
std::string blob_idx("blob_idx");
WriteBatch write_batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&write_batch, 0, key, blob_idx));
ASSERT_OK(db_->Write(WriteOptions(), &write_batch));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"CompactionIterator::InvokeFilterIfNeeded::TamperWithBlobIndex",
[](void* arg) {
Slice* const blob_index = static_cast<Slice*>(arg);
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
/*end=*/nullptr)
.IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
Close();
}
@ -509,7 +496,7 @@ TEST_F(DBBlobCompactionTest, TrackGarbage) {
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
VersionSet* const versions = dbfull()->GetVersionSet();
VersionSet* const versions = dbfull()->TEST_GetVersionSet();
assert(versions);
assert(versions->GetColumnFamilySet());
@ -526,7 +513,8 @@ TEST_F(DBBlobCompactionTest, TrackGarbage) {
ASSERT_EQ(blob_files.size(), 2);
{
const auto& meta = blob_files.front();
auto it = blob_files.begin();
const auto& meta = it->second;
assert(meta);
constexpr uint64_t first_expected_bytes =
@ -556,7 +544,8 @@ TEST_F(DBBlobCompactionTest, TrackGarbage) {
}
{
const auto& meta = blob_files.back();
auto it = blob_files.rbegin();
const auto& meta = it->second;
assert(meta);
constexpr uint64_t new_first_expected_bytes =
@ -602,129 +591,10 @@ TEST_F(DBBlobCompactionTest, MergeBlobWithBase) {
Close();
}
TEST_F(DBBlobCompactionTest, CompactionReadaheadGarbageCollection) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.enable_blob_garbage_collection = true;
options.blob_garbage_collection_age_cutoff = 1.0;
options.blob_compaction_readahead_size = 1 << 10;
options.disable_auto_compactions = true;
Reopen(options);
ASSERT_OK(Put("key", "lime"));
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key", "pie"));
ASSERT_OK(Put("foo", "baz"));
ASSERT_OK(Flush());
size_t num_non_prefetch_reads = 0;
SyncPoint::GetInstance()->SetCallBack(
"BlobFileReader::GetBlob:ReadFromFile",
[&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; });
SyncPoint::GetInstance()->EnableProcessing();
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_EQ(Get("key"), "pie");
ASSERT_EQ(Get("foo"), "baz");
ASSERT_EQ(num_non_prefetch_reads, 0);
Close();
}
TEST_F(DBBlobCompactionTest, CompactionReadaheadFilter) {
Options options = GetDefaultOptions();
std::unique_ptr<CompactionFilter> compaction_filter_guard(
new ValueMutationFilter("pie"));
options.compaction_filter = compaction_filter_guard.get();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.blob_compaction_readahead_size = 1 << 10;
options.disable_auto_compactions = true;
Reopen(options);
ASSERT_OK(Put("key", "lime"));
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Flush());
size_t num_non_prefetch_reads = 0;
SyncPoint::GetInstance()->SetCallBack(
"BlobFileReader::GetBlob:ReadFromFile",
[&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; });
SyncPoint::GetInstance()->EnableProcessing();
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_EQ(Get("key"), "limepie");
ASSERT_EQ(Get("foo"), "barpie");
ASSERT_EQ(num_non_prefetch_reads, 0);
Close();
}
TEST_F(DBBlobCompactionTest, CompactionReadaheadMerge) {
Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.blob_compaction_readahead_size = 1 << 10;
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.disable_auto_compactions = true;
Reopen(options);
ASSERT_OK(Put("key", "lime"));
ASSERT_OK(Put("foo", "bar"));
ASSERT_OK(Flush());
ASSERT_OK(Merge("key", "pie"));
ASSERT_OK(Merge("foo", "baz"));
ASSERT_OK(Flush());
size_t num_non_prefetch_reads = 0;
SyncPoint::GetInstance()->SetCallBack(
"BlobFileReader::GetBlob:ReadFromFile",
[&num_non_prefetch_reads](void* /* arg */) { ++num_non_prefetch_reads; });
SyncPoint::GetInstance()->EnableProcessing();
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
ASSERT_EQ(Get("key"), "lime,pie");
ASSERT_EQ(Get("foo"), "bar,baz");
ASSERT_EQ(num_non_prefetch_reads, 0);
Close();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -77,6 +77,5 @@ TEST_F(DBBlobCorruptionTest, VerifyWholeBlobFileChecksum) {
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -13,7 +13,6 @@
#include <vector>
#include "db/arena_wrapped_db_iter.h"
#include "db/blob/blob_index.h"
#include "db/column_family.h"
#include "db/db_iter.h"
#include "db/db_test_util.h"
@ -139,39 +138,20 @@ class DBBlobIndexTest : public DBTestBase {
}
};
// Note: the following test case pertains to the StackableDB-based BlobDB
// implementation. We should be able to write kTypeBlobIndex to memtables and
// SST files.
// Should be able to write kTypeBlobIndex to memtables and SST files.
TEST_F(DBBlobIndexTest, Write) {
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
std::vector<std::pair<std::string, std::string>> key_values;
constexpr size_t num_key_values = 5;
key_values.reserve(num_key_values);
for (size_t i = 1; i <= num_key_values; ++i) {
std::string key = "key" + std::to_string(i);
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
"blob" + std::to_string(i));
key_values.emplace_back(std::move(key), std::move(blob_index));
}
for (const auto& key_value : key_values) {
for (int i = 1; i <= 5; i++) {
std::string index = ToString(i);
WriteBatch batch;
ASSERT_OK(PutBlobIndex(&batch, key_value.first, key_value.second));
ASSERT_OK(PutBlobIndex(&batch, "key" + index, "blob" + index));
ASSERT_OK(Write(&batch));
}
MoveDataTo(tier);
for (const auto& key_value : key_values) {
ASSERT_EQ(GetBlobIndex(key_value.first), key_value.second);
for (int i = 1; i <= 5; i++) {
std::string index = ToString(i);
ASSERT_EQ("blob" + index, GetBlobIndex("key" + index));
}
}
}
@ -184,19 +164,13 @@ TEST_F(DBBlobIndexTest, Write) {
// accidentally opening the base DB of a stacked BlobDB and actual corruption
// when using the integrated BlobDB.
TEST_F(DBBlobIndexTest, Get) {
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210, "blob");
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
WriteBatch batch;
ASSERT_OK(batch.Put("key", "value"));
ASSERT_OK(PutBlobIndex(&batch, "blob_key", blob_index));
ASSERT_OK(PutBlobIndex(&batch, "blob_key", "blob_index"));
ASSERT_OK(Write(&batch));
MoveDataTo(tier);
// Verify normal value
bool is_blob_index = false;
PinnableSlice value;
@ -204,7 +178,6 @@ TEST_F(DBBlobIndexTest, Get) {
ASSERT_EQ("value", GetImpl("key"));
ASSERT_EQ("value", GetImpl("key", &is_blob_index));
ASSERT_FALSE(is_blob_index);
// Verify blob index
if (tier <= kImmutableMemtables) {
ASSERT_TRUE(Get("blob_key", &value).IsNotSupported());
@ -213,7 +186,7 @@ TEST_F(DBBlobIndexTest, Get) {
ASSERT_TRUE(Get("blob_key", &value).IsCorruption());
ASSERT_EQ("CORRUPTION", GetImpl("blob_key"));
}
ASSERT_EQ(blob_index, GetImpl("blob_key", &is_blob_index));
ASSERT_EQ("blob_index", GetImpl("blob_key", &is_blob_index));
ASSERT_TRUE(is_blob_index);
}
}
@ -223,14 +196,11 @@ TEST_F(DBBlobIndexTest, Get) {
// if blob index is updated with a normal value. See the test case above for
// more details.
TEST_F(DBBlobIndexTest, Updated) {
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210, "blob");
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
WriteBatch batch;
for (int i = 0; i < 10; i++) {
ASSERT_OK(PutBlobIndex(&batch, "key" + std::to_string(i), blob_index));
ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), "blob_index"));
}
ASSERT_OK(Write(&batch));
// Avoid blob values from being purged.
@ -248,7 +218,7 @@ TEST_F(DBBlobIndexTest, Updated) {
ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9"));
MoveDataTo(tier);
for (int i = 0; i < 10; i++) {
ASSERT_EQ(blob_index, GetBlobIndex("key" + std::to_string(i), snapshot));
ASSERT_EQ("blob_index", GetBlobIndex("key" + ToString(i), snapshot));
}
ASSERT_EQ("new_value", Get("key1"));
if (tier <= kImmutableMemtables) {
@ -260,9 +230,9 @@ TEST_F(DBBlobIndexTest, Updated) {
ASSERT_EQ("NOT_FOUND", Get("key4"));
ASSERT_EQ("a,b,c", GetImpl("key5"));
for (int i = 6; i < 9; i++) {
ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i)));
ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
}
ASSERT_EQ(blob_index, GetBlobIndex("key9"));
ASSERT_EQ("blob_index", GetBlobIndex("key9"));
dbfull()->ReleaseSnapshot(snapshot);
}
}
@ -301,7 +271,7 @@ TEST_F(DBBlobIndexTest, Iterate) {
};
auto get_value = [&](int index, int version) {
return get_key(index) + "_value" + std::to_string(version);
return get_key(index) + "_value" + ToString(version);
};
auto check_iterator = [&](Iterator* iterator, Status::Code expected_status,
@ -501,7 +471,7 @@ TEST_F(DBBlobIndexTest, IntegratedBlobIterate) {
auto get_key = [](size_t index) { return ("key" + std::to_string(index)); };
auto get_value = [&](size_t index, size_t version) {
return get_key(index) + "_value" + std::to_string(version);
return get_key(index) + "_value" + ToString(version);
};
auto check_iterator = [&](Iterator* iterator, Status expected_status,
@ -597,6 +567,5 @@ TEST_F(DBBlobIndexTest, IntegratedBlobIterate) {
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,21 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/prefetch_buffer_collection.h"
namespace ROCKSDB_NAMESPACE {
FilePrefetchBuffer* PrefetchBufferCollection::GetOrCreatePrefetchBuffer(
uint64_t file_number) {
auto& prefetch_buffer = prefetch_buffers_[file_number];
if (!prefetch_buffer) {
prefetch_buffer.reset(
new FilePrefetchBuffer(readahead_size_, readahead_size_));
}
return prefetch_buffer.get();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,38 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include <cstdint>
#include <memory>
#include <unordered_map>
#include "file/file_prefetch_buffer.h"
#include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE {
// A class that owns a collection of FilePrefetchBuffers using the file number
// as key. Used for implementing compaction readahead for blob files. Designed
// to be accessed by a single thread only: every (sub)compaction needs its own
// buffers since they are guaranteed to read different blobs from different
// positions even when reading the same file.
class PrefetchBufferCollection {
public:
explicit PrefetchBufferCollection(uint64_t readahead_size)
: readahead_size_(readahead_size) {
assert(readahead_size_ > 0);
}
FilePrefetchBuffer* GetOrCreatePrefetchBuffer(uint64_t file_number);
private:
uint64_t readahead_size_;
std::unordered_map<uint64_t, std::unique_ptr<FilePrefetchBuffer>>
prefetch_buffers_; // maps file number to prefetch buffer
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -15,6 +15,7 @@
#include "db/blob/blob_file_builder.h"
#include "db/compaction/compaction_iterator.h"
#include "db/dbformat.h"
#include "db/event_helpers.h"
#include "db/internal_stats.h"
#include "db/merge_helper.h"
@ -62,10 +63,9 @@ Status BuildTable(
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
std::vector<SequenceNumber> snapshots,
SequenceNumber earliest_write_conflict_snapshot,
SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
bool paranoid_file_checks, InternalStats* internal_stats,
IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCreationReason blob_creation_reason, EventLogger* event_logger,
SnapshotChecker* snapshot_checker, bool paranoid_file_checks,
InternalStats* internal_stats, IOStatus* io_status,
const std::shared_ptr<IOTracer>& io_tracer, EventLogger* event_logger,
int job_id, const Env::IOPriority io_priority,
TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
const std::string* full_history_ts_low,
@ -115,7 +115,6 @@ Status BuildTable(
assert(fs);
TableProperties tp;
bool table_file_created = false;
if (iter->Valid() || !range_del_agg->IsEmpty()) {
std::unique_ptr<CompactionFilter> compaction_filter;
if (ioptions.compaction_filter_factory != nullptr &&
@ -159,8 +158,6 @@ Status BuildTable(
file_checksum_func_name);
return s;
}
table_file_created = true;
FileTypeSet tmp_set = ioptions.checksum_handoff_file_types;
file->SetIOPriority(io_priority);
file->SetWriteLifeTimeHint(write_hint);
@ -181,25 +178,23 @@ Status BuildTable(
std::unique_ptr<BlobFileBuilder> blob_file_builder(
(mutable_cf_options.enable_blob_files && blob_file_additions)
? new BlobFileBuilder(
versions, fs, &ioptions, &mutable_cf_options, &file_options,
job_id, tboptions.column_family_id,
tboptions.column_family_name, io_priority, write_hint,
io_tracer, blob_callback, blob_creation_reason,
? new BlobFileBuilder(versions, fs, &ioptions, &mutable_cf_options,
&file_options, job_id,
tboptions.column_family_id,
tboptions.column_family_name, io_priority,
write_hint, io_tracer, blob_callback,
&blob_file_paths, blob_file_additions)
: nullptr);
CompactionIterator c_iter(
iter, tboptions.internal_comparator.user_comparator(), &merge,
kMaxSequenceNumber, &snapshots, earliest_write_conflict_snapshot,
job_snapshot, snapshot_checker, env,
ShouldReportDetailedTime(env, ioptions.stats),
snapshot_checker, env, ShouldReportDetailedTime(env, ioptions.stats),
true /* internal key corruption is not ok */, range_del_agg.get(),
blob_file_builder.get(), ioptions.allow_data_in_errors,
ioptions.enforce_single_del_contracts,
/*compaction=*/nullptr, compaction_filter.get(),
/*shutting_down=*/nullptr,
/*manual_compaction_paused=*/nullptr,
/*preserve_deletes_seqnum=*/0, /*manual_compaction_paused=*/nullptr,
/*manual_compaction_canceled=*/nullptr, db_options.info_log,
full_history_ts_low);
@ -216,11 +211,7 @@ Status BuildTable(
break;
}
builder->Add(key, value);
s = meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
if (!s.ok()) {
break;
}
meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
// TODO(noetzli): Update stats after flush, too.
if (io_priority == Env::IO_HIGH &&
@ -320,14 +311,13 @@ Status BuildTable(
if (s.ok()) {
s = blob_file_builder->Finish();
} else {
blob_file_builder->Abandon(s);
blob_file_builder->Abandon();
}
blob_file_builder.reset();
}
// TODO Also check the IO status when create the Iterator.
TEST_SYNC_POINT("BuildTable:BeforeOutputValidation");
if (s.ok() && !empty) {
// Verify that the table is usable
// We set for_compaction to false and don't OptimizeForCompactionTableRead
@ -338,8 +328,8 @@ Status BuildTable(
ReadOptions read_options;
std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
read_options, file_options, tboptions.internal_comparator, *meta,
nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor,
nullptr,
nullptr /* range_del_agg */,
mutable_cf_options.prefix_extractor.get(), nullptr,
(internal_stats == nullptr) ? nullptr
: internal_stats->GetFileReadHist(0),
TableReaderCaller::kFlush, /*arena=*/nullptr,
@ -375,16 +365,14 @@ Status BuildTable(
constexpr IODebugContext* dbg = nullptr;
if (table_file_created) {
Status ignored = fs->DeleteFile(fname, IOOptions(), dbg);
ignored.PermitUncheckedError();
}
assert(blob_file_additions || blob_file_paths.empty());
if (blob_file_additions) {
for (const std::string& blob_file_path : blob_file_paths) {
Status ignored = DeleteDBFile(&db_options, blob_file_path, dbname,
ignored = DeleteDBFile(&db_options, blob_file_path, dbname,
/*force_bg=*/false, /*force_fg=*/false);
ignored.PermitUncheckedError();
TEST_SYNC_POINT("BuildTable::AfterDeleteFile");
@ -392,19 +380,14 @@ Status BuildTable(
}
}
Status status_for_listener = s;
if (meta->fd.GetFileSize() == 0) {
fname = "(nil)";
if (s.ok()) {
status_for_listener = Status::Aborted("Empty SST file not kept");
}
}
// Output to event logger and fire events.
EventHelpers::LogAndNotifyTableFileCreationFinished(
event_logger, ioptions.listeners, dbname, tboptions.column_family_name,
fname, job_id, meta->fd, meta->oldest_blob_file_number, tp,
tboptions.reason, status_for_listener, file_checksum,
file_checksum_func_name);
tboptions.reason, s, file_checksum, file_checksum_func_name);
return s;
}

View File

@ -57,10 +57,9 @@ extern Status BuildTable(
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
std::vector<SequenceNumber> snapshots,
SequenceNumber earliest_write_conflict_snapshot,
SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
bool paranoid_file_checks, InternalStats* internal_stats,
IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCreationReason blob_creation_reason,
SnapshotChecker* snapshot_checker, bool paranoid_file_checks,
InternalStats* internal_stats, IOStatus* io_status,
const std::shared_ptr<IOTracer>& io_tracer,
EventLogger* event_logger = nullptr, int job_id = 0,
const Env::IOPriority io_priority = Env::IO_HIGH,
TableProperties* table_properties = nullptr,

355
db/c.cc
View File

@ -35,7 +35,7 @@
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "rocksdb/universal_compaction.h"
#include "rocksdb/utilities/backup_engine.h"
#include "rocksdb/utilities/backupable_db.h"
#include "rocksdb/utilities/checkpoint.h"
#include "rocksdb/utilities/db_ttl.h"
#include "rocksdb/utilities/memory_util.h"
@ -47,8 +47,8 @@
#include "rocksdb/write_batch.h"
#include "utilities/merge_operators.h"
using ROCKSDB_NAMESPACE::BackupableDBOptions;
using ROCKSDB_NAMESPACE::BackupEngine;
using ROCKSDB_NAMESPACE::BackupEngineOptions;
using ROCKSDB_NAMESPACE::BackupID;
using ROCKSDB_NAMESPACE::BackupInfo;
using ROCKSDB_NAMESPACE::BatchResult;
@ -189,8 +189,8 @@ struct rocksdb_transaction_options_t {
struct rocksdb_transaction_t {
Transaction* rep;
};
struct rocksdb_backup_engine_options_t {
BackupEngineOptions rep;
struct rocksdb_backupable_db_options_t {
BackupableDBOptions rep;
};
struct rocksdb_checkpoint_t {
Checkpoint* rep;
@ -290,10 +290,45 @@ struct rocksdb_filterpolicy_t : public FilterPolicy {
void* state_;
void (*destructor_)(void*);
const char* (*name_)(void*);
char* (*create_)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length);
unsigned char (*key_match_)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length);
void (*delete_filter_)(
void*,
const char* filter, size_t filter_length);
~rocksdb_filterpolicy_t() override { (*destructor_)(state_); }
const char* Name() const override { return (*name_)(state_); }
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
std::vector<const char*> key_pointers(n);
std::vector<size_t> key_sizes(n);
for (int i = 0; i < n; i++) {
key_pointers[i] = keys[i].data();
key_sizes[i] = keys[i].size();
}
size_t len;
char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
dst->append(filter, len);
if (delete_filter_ != nullptr) {
(*delete_filter_)(state_, filter, len);
} else {
free(filter);
}
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
return (*key_match_)(state_, key.data(), key.size(),
filter.data(), filter.size());
}
};
struct rocksdb_mergeoperator_t : public MergeOperator {
@ -512,9 +547,10 @@ rocksdb_t* rocksdb_open_as_secondary(const rocksdb_options_t* options,
rocksdb_backup_engine_t* rocksdb_backup_engine_open(
const rocksdb_options_t* options, const char* path, char** errptr) {
BackupEngine* be;
if (SaveError(errptr, BackupEngine::Open(
options->rep.env,
BackupEngineOptions(path, nullptr, true,
if (SaveError(errptr, BackupEngine::Open(options->rep.env,
BackupableDBOptions(path,
nullptr,
true,
options->rep.info_log.get()),
&be))) {
return nullptr;
@ -525,7 +561,7 @@ rocksdb_backup_engine_t* rocksdb_backup_engine_open(
}
rocksdb_backup_engine_t* rocksdb_backup_engine_open_opts(
const rocksdb_backup_engine_options_t* options, rocksdb_env_t* env,
const rocksdb_backupable_db_options_t* options, rocksdb_env_t* env,
char** errptr) {
BackupEngine* be;
if (SaveError(errptr, BackupEngine::Open(options->rep, env->rep, &be))) {
@ -632,125 +668,125 @@ void rocksdb_backup_engine_close(rocksdb_backup_engine_t* be) {
delete be;
}
rocksdb_backup_engine_options_t* rocksdb_backup_engine_options_create(
rocksdb_backupable_db_options_t* rocksdb_backupable_db_options_create(
const char* backup_dir) {
return new rocksdb_backup_engine_options_t{
BackupEngineOptions(std::string(backup_dir))};
return new rocksdb_backupable_db_options_t{
BackupableDBOptions(std::string(backup_dir))};
}
void rocksdb_backup_engine_options_set_backup_dir(
rocksdb_backup_engine_options_t* options, const char* backup_dir) {
void rocksdb_backupable_db_options_set_backup_dir(
rocksdb_backupable_db_options_t* options, const char* backup_dir) {
options->rep.backup_dir = std::string(backup_dir);
}
void rocksdb_backup_engine_options_set_env(
rocksdb_backup_engine_options_t* options, rocksdb_env_t* env) {
void rocksdb_backupable_db_options_set_env(
rocksdb_backupable_db_options_t* options, rocksdb_env_t* env) {
options->rep.backup_env = (env ? env->rep : nullptr);
}
void rocksdb_backup_engine_options_set_share_table_files(
rocksdb_backup_engine_options_t* options, unsigned char val) {
void rocksdb_backupable_db_options_set_share_table_files(
rocksdb_backupable_db_options_t* options, unsigned char val) {
options->rep.share_table_files = val;
}
unsigned char rocksdb_backup_engine_options_get_share_table_files(
rocksdb_backup_engine_options_t* options) {
unsigned char rocksdb_backupable_db_options_get_share_table_files(
rocksdb_backupable_db_options_t* options) {
return options->rep.share_table_files;
}
void rocksdb_backup_engine_options_set_sync(
rocksdb_backup_engine_options_t* options, unsigned char val) {
void rocksdb_backupable_db_options_set_sync(
rocksdb_backupable_db_options_t* options, unsigned char val) {
options->rep.sync = val;
}
unsigned char rocksdb_backup_engine_options_get_sync(
rocksdb_backup_engine_options_t* options) {
unsigned char rocksdb_backupable_db_options_get_sync(
rocksdb_backupable_db_options_t* options) {
return options->rep.sync;
}
void rocksdb_backup_engine_options_set_destroy_old_data(
rocksdb_backup_engine_options_t* options, unsigned char val) {
void rocksdb_backupable_db_options_set_destroy_old_data(
rocksdb_backupable_db_options_t* options, unsigned char val) {
options->rep.destroy_old_data = val;
}
unsigned char rocksdb_backup_engine_options_get_destroy_old_data(
rocksdb_backup_engine_options_t* options) {
unsigned char rocksdb_backupable_db_options_get_destroy_old_data(
rocksdb_backupable_db_options_t* options) {
return options->rep.destroy_old_data;
}
void rocksdb_backup_engine_options_set_backup_log_files(
rocksdb_backup_engine_options_t* options, unsigned char val) {
void rocksdb_backupable_db_options_set_backup_log_files(
rocksdb_backupable_db_options_t* options, unsigned char val) {
options->rep.backup_log_files = val;
}
unsigned char rocksdb_backup_engine_options_get_backup_log_files(
rocksdb_backup_engine_options_t* options) {
unsigned char rocksdb_backupable_db_options_get_backup_log_files(
rocksdb_backupable_db_options_t* options) {
return options->rep.backup_log_files;
}
void rocksdb_backup_engine_options_set_backup_rate_limit(
rocksdb_backup_engine_options_t* options, uint64_t limit) {
void rocksdb_backupable_db_options_set_backup_rate_limit(
rocksdb_backupable_db_options_t* options, uint64_t limit) {
options->rep.backup_rate_limit = limit;
}
uint64_t rocksdb_backup_engine_options_get_backup_rate_limit(
rocksdb_backup_engine_options_t* options) {
uint64_t rocksdb_backupable_db_options_get_backup_rate_limit(
rocksdb_backupable_db_options_t* options) {
return options->rep.backup_rate_limit;
}
void rocksdb_backup_engine_options_set_restore_rate_limit(
rocksdb_backup_engine_options_t* options, uint64_t limit) {
void rocksdb_backupable_db_options_set_restore_rate_limit(
rocksdb_backupable_db_options_t* options, uint64_t limit) {
options->rep.restore_rate_limit = limit;
}
uint64_t rocksdb_backup_engine_options_get_restore_rate_limit(
rocksdb_backup_engine_options_t* options) {
uint64_t rocksdb_backupable_db_options_get_restore_rate_limit(
rocksdb_backupable_db_options_t* options) {
return options->rep.restore_rate_limit;
}
void rocksdb_backup_engine_options_set_max_background_operations(
rocksdb_backup_engine_options_t* options, int val) {
void rocksdb_backupable_db_options_set_max_background_operations(
rocksdb_backupable_db_options_t* options, int val) {
options->rep.max_background_operations = val;
}
int rocksdb_backup_engine_options_get_max_background_operations(
rocksdb_backup_engine_options_t* options) {
int rocksdb_backupable_db_options_get_max_background_operations(
rocksdb_backupable_db_options_t* options) {
return options->rep.max_background_operations;
}
void rocksdb_backup_engine_options_set_callback_trigger_interval_size(
rocksdb_backup_engine_options_t* options, uint64_t size) {
void rocksdb_backupable_db_options_set_callback_trigger_interval_size(
rocksdb_backupable_db_options_t* options, uint64_t size) {
options->rep.callback_trigger_interval_size = size;
}
uint64_t rocksdb_backup_engine_options_get_callback_trigger_interval_size(
rocksdb_backup_engine_options_t* options) {
uint64_t rocksdb_backupable_db_options_get_callback_trigger_interval_size(
rocksdb_backupable_db_options_t* options) {
return options->rep.callback_trigger_interval_size;
}
void rocksdb_backup_engine_options_set_max_valid_backups_to_open(
rocksdb_backup_engine_options_t* options, int val) {
void rocksdb_backupable_db_options_set_max_valid_backups_to_open(
rocksdb_backupable_db_options_t* options, int val) {
options->rep.max_valid_backups_to_open = val;
}
int rocksdb_backup_engine_options_get_max_valid_backups_to_open(
rocksdb_backup_engine_options_t* options) {
int rocksdb_backupable_db_options_get_max_valid_backups_to_open(
rocksdb_backupable_db_options_t* options) {
return options->rep.max_valid_backups_to_open;
}
void rocksdb_backup_engine_options_set_share_files_with_checksum_naming(
rocksdb_backup_engine_options_t* options, int val) {
void rocksdb_backupable_db_options_set_share_files_with_checksum_naming(
rocksdb_backupable_db_options_t* options, int val) {
options->rep.share_files_with_checksum_naming =
static_cast<BackupEngineOptions::ShareFilesNaming>(val);
static_cast<BackupableDBOptions::ShareFilesNaming>(val);
}
int rocksdb_backup_engine_options_get_share_files_with_checksum_naming(
rocksdb_backup_engine_options_t* options) {
int rocksdb_backupable_db_options_get_share_files_with_checksum_naming(
rocksdb_backupable_db_options_t* options) {
return static_cast<int>(options->rep.share_files_with_checksum_naming);
}
void rocksdb_backup_engine_options_destroy(
rocksdb_backup_engine_options_t* options) {
void rocksdb_backupable_db_options_destroy(
rocksdb_backupable_db_options_t* options) {
delete options;
}
@ -1163,43 +1199,6 @@ void rocksdb_multi_get_cf(
}
}
void rocksdb_batched_multi_get_cf(rocksdb_t* db,
const rocksdb_readoptions_t* options,
rocksdb_column_family_handle_t* column_family,
size_t num_keys, const char* const* keys_list,
const size_t* keys_list_sizes,
rocksdb_pinnableslice_t** values, char** errs,
const bool sorted_input) {
Slice* key_slices = new Slice[num_keys];
PinnableSlice* value_slices = new PinnableSlice[num_keys];
Status* statuses = new Status[num_keys];
for (size_t i = 0; i < num_keys; ++i) {
key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
}
db->rep->MultiGet(options->rep, column_family->rep, num_keys, key_slices,
value_slices, statuses, sorted_input);
for (size_t i = 0; i < num_keys; ++i) {
if (statuses[i].ok()) {
values[i] = new (rocksdb_pinnableslice_t);
values[i]->rep = std::move(value_slices[i]);
errs[i] = nullptr;
} else {
values[i] = nullptr;
if (!statuses[i].IsNotFound()) {
errs[i] = strdup(statuses[i].ToString().c_str());
} else {
errs[i] = nullptr;
}
}
}
delete[] key_slices;
delete[] value_slices;
delete[] statuses;
}
unsigned char rocksdb_key_may_exist(rocksdb_t* db,
const rocksdb_readoptions_t* options,
const char* key, size_t key_len,
@ -2334,6 +2333,11 @@ void rocksdb_block_based_options_set_data_block_hash_ratio(
options->rep.data_block_hash_table_util_ratio = v;
}
void rocksdb_block_based_options_set_hash_index_allow_collision(
rocksdb_block_based_table_options_t* options, unsigned char v) {
options->rep.hash_index_allow_collision = v;
}
void rocksdb_block_based_options_set_cache_index_and_filter_blocks(
rocksdb_block_based_table_options_t* options, unsigned char v) {
options->rep.cache_index_and_filter_blocks = v;
@ -2747,25 +2751,6 @@ double rocksdb_options_get_blob_gc_age_cutoff(rocksdb_options_t* opt) {
return opt->rep.blob_garbage_collection_age_cutoff;
}
void rocksdb_options_set_blob_gc_force_threshold(rocksdb_options_t* opt,
double val) {
opt->rep.blob_garbage_collection_force_threshold = val;
}
double rocksdb_options_get_blob_gc_force_threshold(rocksdb_options_t* opt) {
return opt->rep.blob_garbage_collection_force_threshold;
}
void rocksdb_options_set_blob_compaction_readahead_size(rocksdb_options_t* opt,
uint64_t val) {
opt->rep.blob_compaction_readahead_size = val;
}
uint64_t rocksdb_options_get_blob_compaction_readahead_size(
rocksdb_options_t* opt) {
return opt->rep.blob_compaction_readahead_size;
}
void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
opt->rep.num_levels = n;
}
@ -2802,6 +2787,9 @@ int rocksdb_options_get_level0_stop_writes_trigger(rocksdb_options_t* opt) {
return opt->rep.level0_stop_writes_trigger;
}
void rocksdb_options_set_max_mem_compaction_level(rocksdb_options_t* /*opt*/,
int /*n*/) {}
void rocksdb_options_set_wal_recovery_mode(rocksdb_options_t* opt,int mode) {
opt->rep.wal_recovery_mode = static_cast<WALRecoveryMode>(mode);
}
@ -2827,7 +2815,7 @@ int rocksdb_options_get_bottommost_compression(rocksdb_options_t* opt) {
}
void rocksdb_options_set_compression_per_level(rocksdb_options_t* opt,
const int* level_values,
int* level_values,
size_t num_levels) {
opt->rep.compression_per_level.resize(num_levels);
for (size_t i = 0; i < num_levels; ++i) {
@ -2952,6 +2940,10 @@ size_t rocksdb_options_get_manifest_preallocation_size(rocksdb_options_t* opt) {
return opt->rep.manifest_preallocation_size;
}
// noop
void rocksdb_options_set_purge_redundant_kvs_while_flush(
rocksdb_options_t* /*opt*/, unsigned char /*v*/) {}
void rocksdb_options_set_use_direct_reads(rocksdb_options_t* opt,
unsigned char v) {
opt->rep.use_direct_reads = v;
@ -2998,6 +2990,16 @@ unsigned char rocksdb_options_get_is_fd_close_on_exec(rocksdb_options_t* opt) {
return opt->rep.is_fd_close_on_exec;
}
void rocksdb_options_set_skip_log_error_on_recovery(
rocksdb_options_t* opt, unsigned char v) {
opt->rep.skip_log_error_on_recovery = v;
}
unsigned char rocksdb_options_get_skip_log_error_on_recovery(
rocksdb_options_t* opt) {
return opt->rep.skip_log_error_on_recovery;
}
void rocksdb_options_set_stats_dump_period_sec(
rocksdb_options_t* opt, unsigned int v) {
opt->rep.stats_dump_period_sec = v;
@ -3209,6 +3211,15 @@ int rocksdb_options_get_max_background_compactions(rocksdb_options_t* opt) {
return opt->rep.max_background_compactions;
}
void rocksdb_options_set_base_background_compactions(rocksdb_options_t* opt,
int n) {
opt->rep.base_background_compactions = n;
}
int rocksdb_options_get_base_background_compactions(rocksdb_options_t* opt) {
return opt->rep.base_background_compactions;
}
void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) {
opt->rep.max_background_flushes = n;
}
@ -3250,6 +3261,22 @@ size_t rocksdb_options_get_recycle_log_file_num(rocksdb_options_t* opt) {
return opt->rep.recycle_log_file_num;
}
void rocksdb_options_set_soft_rate_limit(rocksdb_options_t* opt, double v) {
opt->rep.soft_rate_limit = v;
}
double rocksdb_options_get_soft_rate_limit(rocksdb_options_t* opt) {
return opt->rep.soft_rate_limit;
}
void rocksdb_options_set_hard_rate_limit(rocksdb_options_t* opt, double v) {
opt->rep.hard_rate_limit = v;
}
double rocksdb_options_get_hard_rate_limit(rocksdb_options_t* opt) {
return opt->rep.hard_rate_limit;
}
void rocksdb_options_set_soft_pending_compaction_bytes_limit(rocksdb_options_t* opt, size_t v) {
opt->rep.soft_pending_compaction_bytes_limit = v;
}
@ -3268,6 +3295,16 @@ size_t rocksdb_options_get_hard_pending_compaction_bytes_limit(
return opt->rep.hard_pending_compaction_bytes_limit;
}
void rocksdb_options_set_rate_limit_delay_max_milliseconds(
rocksdb_options_t* opt, unsigned int v) {
opt->rep.rate_limit_delay_max_milliseconds = v;
}
unsigned int rocksdb_options_get_rate_limit_delay_max_milliseconds(
rocksdb_options_t* opt) {
return opt->rep.rate_limit_delay_max_milliseconds;
}
void rocksdb_options_set_max_manifest_file_size(
rocksdb_options_t* opt, size_t v) {
opt->rep.max_manifest_file_size = v;
@ -3286,6 +3323,11 @@ int rocksdb_options_get_table_cache_numshardbits(rocksdb_options_t* opt) {
return opt->rep.table_cache_numshardbits;
}
void rocksdb_options_set_table_cache_remove_scan_count_limit(
rocksdb_options_t* /*opt*/, int /*v*/) {
// this option is deprecated
}
void rocksdb_options_set_arena_block_size(
rocksdb_options_t* opt, size_t v) {
opt->rep.arena_block_size = v;
@ -3476,14 +3518,6 @@ unsigned char rocksdb_options_get_manual_wal_flush(rocksdb_options_t* opt) {
return opt->rep.manual_wal_flush;
}
void rocksdb_options_set_wal_compression(rocksdb_options_t* opt, int val) {
opt->rep.wal_compression = static_cast<CompressionType>(val);
}
int rocksdb_options_get_wal_compression(rocksdb_options_t* opt) {
return opt->rep.wal_compression;
}
rocksdb_ratelimiter_t* rocksdb_ratelimiter_create(
int64_t rate_bytes_per_sec,
int64_t refill_period_us,
@ -3776,6 +3810,32 @@ void rocksdb_comparator_destroy(rocksdb_comparator_t* cmp) {
delete cmp;
}
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create(
void* state,
void (*destructor)(void*),
char* (*create_filter)(
void*,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length),
unsigned char (*key_may_match)(
void*,
const char* key, size_t length,
const char* filter, size_t filter_length),
void (*delete_filter)(
void*,
const char* filter, size_t filter_length),
const char* (*name)(void*)) {
rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t;
result->state_ = state;
result->destructor_ = destructor;
result->create_ = create_filter;
result->key_match_ = key_may_match;
result->delete_filter_ = delete_filter;
result->name_ = name;
return result;
}
void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) {
delete filter;
}
@ -3789,8 +3849,12 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(
const FilterPolicy* rep_;
~Wrapper() override { delete rep_; }
const char* Name() const override { return rep_->Name(); }
const char* CompatibilityName() const override {
return rep_->CompatibilityName();
void CreateFilter(const Slice* keys, int n,
std::string* dst) const override {
return rep_->CreateFilter(keys, n, dst);
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
return rep_->KeyMayMatch(key, filter);
}
// No need to override GetFilterBitsBuilder if this one is overridden
ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext(
@ -3807,6 +3871,7 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(
Wrapper* wrapper = new Wrapper;
wrapper->rep_ = NewBloomFilterPolicy(bits_per_key, original_format);
wrapper->state_ = nullptr;
wrapper->delete_filter_ = nullptr;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}
@ -3829,8 +3894,12 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
const FilterPolicy* rep_;
~Wrapper() override { delete rep_; }
const char* Name() const override { return rep_->Name(); }
const char* CompatibilityName() const override {
return rep_->CompatibilityName();
void CreateFilter(const Slice* keys, int n,
std::string* dst) const override {
return rep_->CreateFilter(keys, n, dst);
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
return rep_->KeyMayMatch(key, filter);
}
ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext(
const ROCKSDB_NAMESPACE::FilterBuildingContext& context)
@ -3847,6 +3916,7 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
wrapper->rep_ =
NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level);
wrapper->state_ = nullptr;
wrapper->delete_filter_ = nullptr;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}
@ -4230,14 +4300,6 @@ rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) {
return c;
}
rocksdb_cache_t* rocksdb_cache_create_lru_with_strict_capacity_limit(
size_t capacity) {
rocksdb_cache_t* c = new rocksdb_cache_t;
c->rep = NewLRUCache(capacity);
c->rep->SetStrictCapacityLimit(true);
return c;
}
rocksdb_cache_t* rocksdb_cache_create_lru_opts(
rocksdb_lru_cache_options_t* opt) {
rocksdb_cache_t* c = new rocksdb_cache_t;
@ -4367,11 +4429,6 @@ rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create(
return writer;
}
void rocksdb_create_dir_if_missing(rocksdb_env_t* env, const char* path,
char** errptr) {
SaveError(errptr, env->rep->CreateDirIfMissing(std::string(path)));
}
rocksdb_sstfilewriter_t* rocksdb_sstfilewriter_create_with_comparator(
const rocksdb_envoptions_t* env, const rocksdb_options_t* io_options,
const rocksdb_comparator_t* /*comparator*/) {
@ -4519,11 +4576,10 @@ void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t* st) {
delete st;
}
struct SliceTransformWrapper : public rocksdb_slicetransform_t {
struct Wrapper : public rocksdb_slicetransform_t {
const SliceTransform* rep_;
~SliceTransformWrapper() override { delete rep_; }
~Wrapper() override { delete rep_; }
const char* Name() const override { return rep_->Name(); }
std::string GetId() const override { return rep_->GetId(); }
Slice Transform(const Slice& src) const override {
return rep_->Transform(src);
}
@ -4535,18 +4591,18 @@ struct SliceTransformWrapper : public rocksdb_slicetransform_t {
};
rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t prefixLen) {
SliceTransformWrapper* wrapper = new SliceTransformWrapper;
Wrapper* wrapper = new Wrapper;
wrapper->rep_ = ROCKSDB_NAMESPACE::NewFixedPrefixTransform(prefixLen);
wrapper->state_ = nullptr;
wrapper->destructor_ = &SliceTransformWrapper::DoNothing;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}
rocksdb_slicetransform_t* rocksdb_slicetransform_create_noop() {
SliceTransformWrapper* wrapper = new SliceTransformWrapper;
Wrapper* wrapper = new Wrapper;
wrapper->rep_ = ROCKSDB_NAMESPACE::NewNoopTransform();
wrapper->state_ = nullptr;
wrapper->destructor_ = &SliceTransformWrapper::DoNothing;
wrapper->destructor_ = &Wrapper::DoNothing;
return wrapper;
}
@ -4662,11 +4718,6 @@ int rocksdb_livefiles_count(
return static_cast<int>(lf->rep.size());
}
const char* rocksdb_livefiles_column_family_name(const rocksdb_livefiles_t* lf,
int index) {
return lf->rep[index].column_family_name.c_str();
}
const char* rocksdb_livefiles_name(
const rocksdb_livefiles_t* lf,
int index) {

View File

@ -7,13 +7,12 @@
#ifndef ROCKSDB_LITE // Lite does not support C API
#include <assert.h>
#include "rocksdb/c.h"
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "rocksdb/c.h"
#ifndef OS_WIN
#include <unistd.h>
#endif
@ -90,8 +89,10 @@ static void CheckEqual(const char* expected, const char* v, size_t n) {
// ok
return;
} else {
fprintf(stderr, "%s: expected '%s', got '%s'\n", phase,
(expected ? expected : "(null)"), (v ? v : "(null)"));
fprintf(stderr, "%s: expected '%s', got '%s'\n",
phase,
(expected ? expected : "(null)"),
(v ? v : "(null"));
abort();
}
}
@ -225,6 +226,39 @@ static const char* CmpName(void* arg) {
return "foo";
}
// Custom filter policy
static unsigned char fake_filter_result = 1;
static void FilterDestroy(void* arg) { (void)arg; }
static const char* FilterName(void* arg) {
(void)arg;
return "TestFilter";
}
static char* FilterCreate(
void* arg,
const char* const* key_array, const size_t* key_length_array,
int num_keys,
size_t* filter_length) {
(void)arg;
(void)key_array;
(void)key_length_array;
(void)num_keys;
*filter_length = 4;
char* result = malloc(4);
memcpy(result, "fake", 4);
return result;
}
static unsigned char FilterKeyMatch(
void* arg,
const char* key, size_t length,
const char* filter, size_t filter_length) {
(void)arg;
(void)key;
(void)length;
CheckCondition(filter_length == 4);
CheckCondition(memcmp(filter, "fake", 4) == 0);
return fake_filter_result;
}
// Custom compaction filter
static void CFilterDestroy(void* arg) { (void)arg; }
static const char* CFilterName(void* arg) {
@ -446,10 +480,6 @@ int main(int argc, char** argv) {
cmp = rocksdb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
dbpath = rocksdb_dbpath_create(dbpathname, 1024 * 1024);
env = rocksdb_create_default_env();
rocksdb_create_dir_if_missing(env, GetTempDir(), &err);
CheckNoError(err);
cache = rocksdb_cache_create_lru(100000);
options = rocksdb_options_create();
@ -460,6 +490,7 @@ int main(int argc, char** argv) {
rocksdb_options_set_write_buffer_size(options, 100000);
rocksdb_options_set_paranoid_checks(options, 1);
rocksdb_options_set_max_open_files(options, 10);
rocksdb_options_set_base_background_compactions(options, 1);
table_options = rocksdb_block_based_options_create();
rocksdb_block_based_options_set_block_cache(table_options, cache);
@ -988,36 +1019,7 @@ int main(int argc, char** argv) {
CheckGet(db, roptions, "foo", NULL);
rocksdb_release_snapshot(db, snap);
}
StartPhase("snapshot_with_memtable_inplace_update");
{
rocksdb_close(db);
const rocksdb_snapshot_t* snap = NULL;
const char* s_key = "foo_snap";
const char* value1 = "hello_s1";
const char* value2 = "hello_s2";
rocksdb_options_set_allow_concurrent_memtable_write(options, 0);
rocksdb_options_set_inplace_update_support(options, 1);
rocksdb_options_set_error_if_exists(options, 0);
db = rocksdb_open(options, dbname, &err);
CheckNoError(err);
rocksdb_put(db, woptions, s_key, 8, value1, 8, &err);
snap = rocksdb_create_snapshot(db);
assert(snap != NULL);
rocksdb_put(db, woptions, s_key, 8, value2, 8, &err);
CheckNoError(err);
rocksdb_readoptions_set_snapshot(roptions, snap);
CheckGet(db, roptions, "foo", NULL);
// snapshot syntax is invalid, because of inplace update supported is set
CheckGet(db, roptions, s_key, value2);
// restore the data and options
rocksdb_delete(db, woptions, s_key, 8, &err);
CheckGet(db, roptions, s_key, NULL);
rocksdb_release_snapshot(db, snap);
rocksdb_readoptions_set_snapshot(roptions, NULL);
rocksdb_options_set_inplace_update_support(options, 0);
rocksdb_options_set_allow_concurrent_memtable_write(options, 1);
rocksdb_options_set_error_if_exists(options, 1);
}
StartPhase("repair");
{
// If we do not compact here, then the lazy deletion of
@ -1041,15 +1043,18 @@ int main(int argc, char** argv) {
}
StartPhase("filter");
for (run = 1; run <= 4; run++) {
// run=0 uses custom filter (not currently supported)
for (run = 0; run <= 4; run++) {
// run=0 uses custom filter
// run=1 uses old block-based bloom filter
// run=2 run uses full bloom filter
// run=3 uses Ribbon
// run=4 uses Ribbon-Bloom hybrid configuration
CheckNoError(err);
rocksdb_filterpolicy_t* policy;
if (run == 1) {
if (run == 0) {
policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate,
FilterKeyMatch, NULL, FilterName);
} else if (run == 1) {
policy = rocksdb_filterpolicy_create_bloom(8.0);
} else if (run == 2) {
policy = rocksdb_filterpolicy_create_bloom_full(8.0);
@ -1084,8 +1089,19 @@ int main(int argc, char** argv) {
}
rocksdb_compact_range(db, NULL, 0, NULL, 0);
fake_filter_result = 1;
CheckGet(db, roptions, "foo", "foovalue");
CheckGet(db, roptions, "bar", "barvalue");
if (run == 0) {
// Must not find value when custom filter returns false
fake_filter_result = 0;
CheckGet(db, roptions, "foo", NULL);
CheckGet(db, roptions, "bar", NULL);
fake_filter_result = 1;
CheckGet(db, roptions, "foo", "foovalue");
CheckGet(db, roptions, "bar", "barvalue");
}
{
// Query some keys not added to identify Bloom filter implementation
@ -1098,6 +1114,7 @@ int main(int argc, char** argv) {
int i;
char keybuf[100];
for (i = 0; i < keys_to_query; i++) {
fake_filter_result = i % 2;
snprintf(keybuf, sizeof(keybuf), "no%020d", i);
CheckGet(db, roptions, keybuf, NULL);
}
@ -1107,10 +1124,10 @@ int main(int argc, char** argv) {
if (run == 0) {
// Due to half true, half false with fake filter result
CheckCondition(hits == keys_to_query / 2);
} else if (run == 1 || run == 2 || run == 4) {
// For run == 1, block-based Bloom is no longer available in public
// API; attempting to enable it enables full Bloom instead.
//
} else if (run == 1) {
// Essentially a fingerprint of the block-based Bloom schema
CheckCondition(hits == 241);
} else if (run == 2 || run == 4) {
// Essentially a fingerprint of full Bloom schema, format_version=5
CheckCondition(hits == 188);
} else {
@ -1260,18 +1277,15 @@ int main(int argc, char** argv) {
rocksdb_writebatch_clear(wb);
rocksdb_writebatch_put_cf(wb, handles[1], "bar", 3, "b", 1);
rocksdb_writebatch_put_cf(wb, handles[1], "box", 3, "c", 1);
rocksdb_writebatch_put_cf(wb, handles[1], "buff", 4, "rocksdb", 7);
rocksdb_writebatch_delete_cf(wb, handles[1], "bar", 3);
rocksdb_write(db, woptions, wb, &err);
CheckNoError(err);
CheckGetCF(db, roptions, handles[1], "baz", NULL);
CheckGetCF(db, roptions, handles[1], "bar", NULL);
CheckGetCF(db, roptions, handles[1], "box", "c");
CheckGetCF(db, roptions, handles[1], "buff", "rocksdb");
CheckPinGetCF(db, roptions, handles[1], "baz", NULL);
CheckPinGetCF(db, roptions, handles[1], "bar", NULL);
CheckPinGetCF(db, roptions, handles[1], "box", "c");
CheckPinGetCF(db, roptions, handles[1], "buff", "rocksdb");
rocksdb_writebatch_destroy(wb);
rocksdb_flush_wal(db, 1, &err);
@ -1302,26 +1316,6 @@ int main(int argc, char** argv) {
Free(&vals[i]);
}
{
const char* batched_keys[4] = {"box", "buff", "barfooxx", "box"};
const size_t batched_keys_sizes[4] = {3, 4, 8, 3};
const char* expected_value[4] = {"c", "rocksdb", NULL, "c"};
char* batched_errs[4];
rocksdb_pinnableslice_t* pvals[4];
rocksdb_batched_multi_get_cf(db, roptions, handles[1], 4, batched_keys,
batched_keys_sizes, pvals, batched_errs,
false);
const char* val;
size_t val_len;
for (i = 0; i < 4; ++i) {
val = rocksdb_pinnableslice_value(pvals[i], &val_len);
CheckNoError(batched_errs[i]);
CheckEqual(expected_value[i], val, val_len);
rocksdb_pinnableslice_destroy(pvals[i]);
}
}
{
unsigned char value_found = 0;
@ -1353,7 +1347,7 @@ int main(int argc, char** argv) {
for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) {
i++;
}
CheckCondition(i == 4);
CheckCondition(i == 3);
rocksdb_iter_get_error(iter, &err);
CheckNoError(err);
rocksdb_iter_destroy(iter);
@ -1377,7 +1371,7 @@ int main(int argc, char** argv) {
for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) {
i++;
}
CheckCondition(i == 4);
CheckCondition(i == 3);
rocksdb_iter_get_error(iter, &err);
CheckNoError(err);
rocksdb_iter_destroy(iter);
@ -1619,6 +1613,9 @@ int main(int argc, char** argv) {
rocksdb_options_set_max_background_compactions(o, 3);
CheckCondition(3 == rocksdb_options_get_max_background_compactions(o));
rocksdb_options_set_base_background_compactions(o, 4);
CheckCondition(4 == rocksdb_options_get_base_background_compactions(o));
rocksdb_options_set_max_background_flushes(o, 5);
CheckCondition(5 == rocksdb_options_get_max_background_flushes(o));
@ -1634,6 +1631,12 @@ int main(int argc, char** argv) {
rocksdb_options_set_recycle_log_file_num(o, 9);
CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(o));
rocksdb_options_set_soft_rate_limit(o, 2.0);
CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(o));
rocksdb_options_set_hard_rate_limit(o, 4.0);
CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(o));
rocksdb_options_set_soft_pending_compaction_bytes_limit(o, 10);
CheckCondition(10 ==
rocksdb_options_get_soft_pending_compaction_bytes_limit(o));
@ -1642,6 +1645,10 @@ int main(int argc, char** argv) {
CheckCondition(11 ==
rocksdb_options_get_hard_pending_compaction_bytes_limit(o));
rocksdb_options_set_rate_limit_delay_max_milliseconds(o, 1);
CheckCondition(1 ==
rocksdb_options_get_rate_limit_delay_max_milliseconds(o));
rocksdb_options_set_max_manifest_file_size(o, 12);
CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(o));
@ -1679,6 +1686,9 @@ int main(int argc, char** argv) {
rocksdb_options_set_is_fd_close_on_exec(o, 1);
CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(o));
rocksdb_options_set_skip_log_error_on_recovery(o, 1);
CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(o));
rocksdb_options_set_stats_dump_period_sec(o, 18);
CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(o));
@ -1767,9 +1777,6 @@ int main(int argc, char** argv) {
rocksdb_options_set_manual_wal_flush(o, 1);
CheckCondition(1 == rocksdb_options_get_manual_wal_flush(o));
rocksdb_options_set_wal_compression(o, 1);
CheckCondition(1 == rocksdb_options_get_wal_compression(o));
/* Blob Options */
rocksdb_options_set_enable_blob_files(o, 1);
CheckCondition(1 == rocksdb_options_get_enable_blob_files(o));
@ -1786,15 +1793,8 @@ int main(int argc, char** argv) {
rocksdb_options_set_enable_blob_gc(o, 1);
CheckCondition(1 == rocksdb_options_get_enable_blob_gc(o));
rocksdb_options_set_blob_gc_age_cutoff(o, 0.5);
CheckCondition(0.5 == rocksdb_options_get_blob_gc_age_cutoff(o));
rocksdb_options_set_blob_gc_force_threshold(o, 0.75);
CheckCondition(0.75 == rocksdb_options_get_blob_gc_force_threshold(o));
rocksdb_options_set_blob_compaction_readahead_size(o, 262144);
CheckCondition(262144 ==
rocksdb_options_get_blob_compaction_readahead_size(o));
rocksdb_options_set_blob_gc_age_cutoff(o, 0.75);
CheckCondition(0.75 == rocksdb_options_get_blob_gc_age_cutoff(o));
// Create a copy that should be equal to the original.
rocksdb_options_t* copy;
@ -1841,15 +1841,20 @@ int main(int argc, char** argv) {
CheckCondition(123456 == rocksdb_options_get_max_subcompactions(copy));
CheckCondition(2 == rocksdb_options_get_max_background_jobs(copy));
CheckCondition(3 == rocksdb_options_get_max_background_compactions(copy));
CheckCondition(4 == rocksdb_options_get_base_background_compactions(copy));
CheckCondition(5 == rocksdb_options_get_max_background_flushes(copy));
CheckCondition(6 == rocksdb_options_get_max_log_file_size(copy));
CheckCondition(7 == rocksdb_options_get_log_file_time_to_roll(copy));
CheckCondition(8 == rocksdb_options_get_keep_log_file_num(copy));
CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(copy));
CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(copy));
CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(copy));
CheckCondition(
10 == rocksdb_options_get_soft_pending_compaction_bytes_limit(copy));
CheckCondition(
11 == rocksdb_options_get_hard_pending_compaction_bytes_limit(copy));
CheckCondition(1 ==
rocksdb_options_get_rate_limit_delay_max_milliseconds(copy));
CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(copy));
CheckCondition(13 == rocksdb_options_get_table_cache_numshardbits(copy));
CheckCondition(14 == rocksdb_options_get_arena_block_size(copy));
@ -1863,6 +1868,7 @@ int main(int argc, char** argv) {
CheckCondition(
1 == rocksdb_options_get_use_direct_io_for_flush_and_compaction(copy));
CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(copy));
CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(copy));
CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(copy));
CheckCondition(5 == rocksdb_options_get_stats_persist_period_sec(copy));
CheckCondition(1 == rocksdb_options_get_advise_random_on_open(copy));
@ -2043,6 +2049,10 @@ int main(int argc, char** argv) {
CheckCondition(13 == rocksdb_options_get_max_background_compactions(copy));
CheckCondition(3 == rocksdb_options_get_max_background_compactions(o));
rocksdb_options_set_base_background_compactions(copy, 14);
CheckCondition(14 == rocksdb_options_get_base_background_compactions(copy));
CheckCondition(4 == rocksdb_options_get_base_background_compactions(o));
rocksdb_options_set_max_background_flushes(copy, 15);
CheckCondition(15 == rocksdb_options_get_max_background_flushes(copy));
CheckCondition(5 == rocksdb_options_get_max_background_flushes(o));
@ -2063,6 +2073,14 @@ int main(int argc, char** argv) {
CheckCondition(19 == rocksdb_options_get_recycle_log_file_num(copy));
CheckCondition(9 == rocksdb_options_get_recycle_log_file_num(o));
rocksdb_options_set_soft_rate_limit(copy, 4.0);
CheckCondition(4.0 == rocksdb_options_get_soft_rate_limit(copy));
CheckCondition(2.0 == rocksdb_options_get_soft_rate_limit(o));
rocksdb_options_set_hard_rate_limit(copy, 2.0);
CheckCondition(2.0 == rocksdb_options_get_hard_rate_limit(copy));
CheckCondition(4.0 == rocksdb_options_get_hard_rate_limit(o));
rocksdb_options_set_soft_pending_compaction_bytes_limit(copy, 110);
CheckCondition(
110 == rocksdb_options_get_soft_pending_compaction_bytes_limit(copy));
@ -2075,6 +2093,12 @@ int main(int argc, char** argv) {
CheckCondition(11 ==
rocksdb_options_get_hard_pending_compaction_bytes_limit(o));
rocksdb_options_set_rate_limit_delay_max_milliseconds(copy, 0);
CheckCondition(0 ==
rocksdb_options_get_rate_limit_delay_max_milliseconds(copy));
CheckCondition(1 ==
rocksdb_options_get_rate_limit_delay_max_milliseconds(o));
rocksdb_options_set_max_manifest_file_size(copy, 112);
CheckCondition(112 == rocksdb_options_get_max_manifest_file_size(copy));
CheckCondition(12 == rocksdb_options_get_max_manifest_file_size(o));
@ -2126,6 +2150,10 @@ int main(int argc, char** argv) {
CheckCondition(0 == rocksdb_options_get_is_fd_close_on_exec(copy));
CheckCondition(1 == rocksdb_options_get_is_fd_close_on_exec(o));
rocksdb_options_set_skip_log_error_on_recovery(copy, 0);
CheckCondition(0 == rocksdb_options_get_skip_log_error_on_recovery(copy));
CheckCondition(1 == rocksdb_options_get_skip_log_error_on_recovery(o));
rocksdb_options_set_stats_dump_period_sec(copy, 218);
CheckCondition(218 == rocksdb_options_get_stats_dump_period_sec(copy));
CheckCondition(18 == rocksdb_options_get_stats_dump_period_sec(o));
@ -2471,53 +2499,53 @@ int main(int argc, char** argv) {
rocksdb_fifo_compaction_options_destroy(fco);
}
StartPhase("backup_engine_option");
StartPhase("backupable_db_option");
{
rocksdb_backup_engine_options_t* bdo;
bdo = rocksdb_backup_engine_options_create("path");
rocksdb_backupable_db_options_t* bdo;
bdo = rocksdb_backupable_db_options_create("path");
rocksdb_backup_engine_options_set_share_table_files(bdo, 1);
rocksdb_backupable_db_options_set_share_table_files(bdo, 1);
CheckCondition(1 ==
rocksdb_backup_engine_options_get_share_table_files(bdo));
rocksdb_backupable_db_options_get_share_table_files(bdo));
rocksdb_backup_engine_options_set_sync(bdo, 1);
CheckCondition(1 == rocksdb_backup_engine_options_get_sync(bdo));
rocksdb_backupable_db_options_set_sync(bdo, 1);
CheckCondition(1 == rocksdb_backupable_db_options_get_sync(bdo));
rocksdb_backup_engine_options_set_destroy_old_data(bdo, 1);
rocksdb_backupable_db_options_set_destroy_old_data(bdo, 1);
CheckCondition(1 ==
rocksdb_backup_engine_options_get_destroy_old_data(bdo));
rocksdb_backupable_db_options_get_destroy_old_data(bdo));
rocksdb_backup_engine_options_set_backup_log_files(bdo, 1);
rocksdb_backupable_db_options_set_backup_log_files(bdo, 1);
CheckCondition(1 ==
rocksdb_backup_engine_options_get_backup_log_files(bdo));
rocksdb_backupable_db_options_get_backup_log_files(bdo));
rocksdb_backup_engine_options_set_backup_rate_limit(bdo, 123);
rocksdb_backupable_db_options_set_backup_rate_limit(bdo, 123);
CheckCondition(123 ==
rocksdb_backup_engine_options_get_backup_rate_limit(bdo));
rocksdb_backupable_db_options_get_backup_rate_limit(bdo));
rocksdb_backup_engine_options_set_restore_rate_limit(bdo, 37);
rocksdb_backupable_db_options_set_restore_rate_limit(bdo, 37);
CheckCondition(37 ==
rocksdb_backup_engine_options_get_restore_rate_limit(bdo));
rocksdb_backupable_db_options_get_restore_rate_limit(bdo));
rocksdb_backup_engine_options_set_max_background_operations(bdo, 20);
rocksdb_backupable_db_options_set_max_background_operations(bdo, 20);
CheckCondition(
20 == rocksdb_backup_engine_options_get_max_background_operations(bdo));
20 == rocksdb_backupable_db_options_get_max_background_operations(bdo));
rocksdb_backup_engine_options_set_callback_trigger_interval_size(bdo, 9000);
rocksdb_backupable_db_options_set_callback_trigger_interval_size(bdo, 9000);
CheckCondition(
9000 ==
rocksdb_backup_engine_options_get_callback_trigger_interval_size(bdo));
rocksdb_backupable_db_options_get_callback_trigger_interval_size(bdo));
rocksdb_backup_engine_options_set_max_valid_backups_to_open(bdo, 40);
rocksdb_backupable_db_options_set_max_valid_backups_to_open(bdo, 40);
CheckCondition(
40 == rocksdb_backup_engine_options_get_max_valid_backups_to_open(bdo));
40 == rocksdb_backupable_db_options_get_max_valid_backups_to_open(bdo));
rocksdb_backup_engine_options_set_share_files_with_checksum_naming(bdo, 2);
rocksdb_backupable_db_options_set_share_files_with_checksum_naming(bdo, 2);
CheckCondition(
2 == rocksdb_backup_engine_options_get_share_files_with_checksum_naming(
2 == rocksdb_backupable_db_options_get_share_files_with_checksum_naming(
bdo));
rocksdb_backup_engine_options_destroy(bdo);
rocksdb_backupable_db_options_destroy(bdo);
}
StartPhase("compression_options");
@ -2921,51 +2949,6 @@ int main(int argc, char** argv) {
CheckNoError(err);
}
StartPhase("filter_with_prefix_seek");
{
rocksdb_close(db);
rocksdb_destroy_db(options, dbname, &err);
CheckNoError(err);
rocksdb_options_set_prefix_extractor(
options, rocksdb_slicetransform_create_fixed_prefix(1));
rocksdb_filterpolicy_t* filter_policy =
rocksdb_filterpolicy_create_bloom_full(8.0);
rocksdb_block_based_options_set_filter_policy(table_options, filter_policy);
rocksdb_options_set_block_based_table_factory(options, table_options);
db = rocksdb_open(options, dbname, &err);
CheckNoError(err);
int i;
for (i = 0; i < 10; ++i) {
char key = '0' + (char)i;
rocksdb_put(db, woptions, &key, 1, "", 1, &err);
CheckNoError(err);
}
// Flush to generate an L0 so that filter will be used later.
rocksdb_flushoptions_t* flush_options = rocksdb_flushoptions_create();
rocksdb_flushoptions_set_wait(flush_options, 1);
rocksdb_flush(db, flush_options, &err);
rocksdb_flushoptions_destroy(flush_options);
CheckNoError(err);
rocksdb_readoptions_t* ropts = rocksdb_readoptions_create();
rocksdb_iterator_t* iter = rocksdb_create_iterator(db, ropts);
rocksdb_iter_seek(iter, "0", 1);
int cnt = 0;
while (rocksdb_iter_valid(iter)) {
++cnt;
rocksdb_iter_next(iter);
}
CheckCondition(10 == cnt);
rocksdb_iter_destroy(iter);
rocksdb_readoptions_destroy(ropts);
}
StartPhase("cancel_all_background_work");
rocksdb_cancel_all_background_work(db, 1);

View File

@ -29,7 +29,7 @@
#include "db/version_set.h"
#include "db/write_controller.h"
#include "file/sst_file_manager_impl.h"
#include "logging/logging.h"
#include "memtable/hash_skiplist_rep.h"
#include "monitoring/thread_status_util.h"
#include "options/options_helper.h"
#include "port/port.h"
@ -75,6 +75,11 @@ ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
bool defer_purge =
db_->immutable_db_options().avoid_unnecessary_blocking_io;
db_->PurgeObsoleteFiles(job_context, defer_purge);
if (defer_purge) {
mutex_->Lock();
db_->SchedulePurge();
mutex_->Unlock();
}
}
job_context.Clean();
}
@ -198,7 +203,7 @@ Status CheckCFPathsSupported(const DBOptions& db_options,
namespace {
const uint64_t kDefaultTtl = 0xfffffffffffffffe;
const uint64_t kDefaultPeriodicCompSecs = 0xfffffffffffffffe;
} // namespace
}; // namespace
ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
const ColumnFamilyOptions& src) {
@ -206,8 +211,7 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
size_t clamp_max = std::conditional<
sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
std::integral_constant<uint64_t, 64ull << 30>>::type::value;
ClipToRange(&result.write_buffer_size, (static_cast<size_t>(64)) << 10,
clamp_max);
ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max);
// if user sets arena_block_size, we trust user to use this value. Otherwise,
// calculate a proper value from writer_buffer_size;
if (result.arena_block_size <= 0) {
@ -345,18 +349,12 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
}
if (result.level_compaction_dynamic_level_bytes) {
if (result.compaction_style != kCompactionStyleLevel) {
ROCKS_LOG_WARN(db_options.info_log.get(),
"level_compaction_dynamic_level_bytes only makes sense"
"for level-based compaction");
result.level_compaction_dynamic_level_bytes = false;
} else if (result.cf_paths.size() > 1U) {
// we don't yet know how to make both of this feature and multiple
if (result.compaction_style != kCompactionStyleLevel ||
result.cf_paths.size() > 1U) {
// 1. level_compaction_dynamic_level_bytes only makes sense for
// level-based compaction.
// 2. we don't yet know how to make both of this feature and multiple
// DB path work.
ROCKS_LOG_WARN(db_options.info_log.get(),
"multiple cf_paths/db_paths and"
"level_compaction_dynamic_level_bytes"
"can't be used together");
result.level_compaction_dynamic_level_bytes = false;
}
}
@ -501,8 +499,7 @@ std::vector<std::string> ColumnFamilyData::GetDbPaths() const {
return paths;
}
const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId =
std::numeric_limits<uint32_t>::max();
const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId = port::kMaxUint32;
ColumnFamilyData::ColumnFamilyData(
uint32_t id, const std::string& name, Version* _dummy_versions,
@ -827,8 +824,8 @@ int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger,
// condition.
// Or twice as compaction trigger, if it is smaller.
int64_t res = std::min(twice_level0_trigger, one_fourth_trigger_slowdown);
if (res >= std::numeric_limits<int32_t>::max()) {
return std::numeric_limits<int32_t>::max();
if (res >= port::kMaxInt32) {
return port::kMaxInt32;
} else {
// res fits in int
return static_cast<int>(res);
@ -1052,10 +1049,6 @@ uint64_t ColumnFamilyData::GetTotalSstFilesSize() const {
return VersionSet::GetTotalSstFilesSize(dummy_versions_);
}
uint64_t ColumnFamilyData::GetTotalBlobFileSize() const {
return VersionSet::GetTotalBlobFileSize(dummy_versions_);
}
uint64_t ColumnFamilyData::GetLiveSstFilesSize() const {
return current_->GetSstFilesSize();
}
@ -1165,12 +1158,12 @@ Compaction* ColumnFamilyData::CompactRange(
int output_level, const CompactRangeOptions& compact_range_options,
const InternalKey* begin, const InternalKey* end,
InternalKey** compaction_end, bool* conflict,
uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
uint64_t max_file_num_to_ignore) {
auto* result = compaction_picker_->CompactRange(
GetName(), mutable_cf_options, mutable_db_options,
current_->storage_info(), input_level, output_level,
compact_range_options, begin, end, compaction_end, conflict,
max_file_num_to_ignore, trim_ts);
max_file_num_to_ignore);
if (result != nullptr) {
result->SetInputVersion(current_);
}
@ -1355,20 +1348,13 @@ Status ColumnFamilyData::ValidateOptions(
}
}
if (cf_options.enable_blob_garbage_collection) {
if (cf_options.blob_garbage_collection_age_cutoff < 0.0 ||
cf_options.blob_garbage_collection_age_cutoff > 1.0) {
if (cf_options.enable_blob_garbage_collection &&
(cf_options.blob_garbage_collection_age_cutoff < 0.0 ||
cf_options.blob_garbage_collection_age_cutoff > 1.0)) {
return Status::InvalidArgument(
"The age cutoff for blob garbage collection should be in the range "
"[0.0, 1.0].");
}
if (cf_options.blob_garbage_collection_force_threshold < 0.0 ||
cf_options.blob_garbage_collection_force_threshold > 1.0) {
return Status::InvalidArgument(
"The garbage ratio threshold for forcing blob garbage collection "
"should be in the range [0.0, 1.0].");
}
}
if (cf_options.compaction_style == kCompactionStyleFIFO &&
db_options.max_open_files != -1 && cf_options.ttl > 0) {
@ -1563,6 +1549,20 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
return new_cfd;
}
// REQUIRES: DB mutex held
void ColumnFamilySet::FreeDeadColumnFamilies() {
autovector<ColumnFamilyData*> to_delete;
for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) {
if (cfd->refs_.load(std::memory_order_relaxed) == 0) {
to_delete.push_back(cfd);
}
}
for (auto cfd : to_delete) {
// this is very rare, so it's not a problem that we do it under a mutex
delete cfd;
}
}
// under a DB mutex AND from a write thread
void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) {
auto cfd_iter = column_family_data_.find(cfd->GetID());

View File

@ -9,10 +9,10 @@
#pragma once
#include <atomic>
#include <string>
#include <unordered_map>
#include <string>
#include <vector>
#include <atomic>
#include "db/memtable_list.h"
#include "db/table_cache.h"
@ -25,7 +25,6 @@
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "trace_replay/block_cache_tracer.h"
#include "util/hash_containers.h"
#include "util/thread_local.h"
namespace ROCKSDB_NAMESPACE {
@ -358,7 +357,6 @@ class ColumnFamilyData {
uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held
uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held
uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held
uint64_t GetTotalBlobFileSize() const; // REQUIRE: DB mutex held
void SetMemtable(MemTable* new_mem) {
uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1;
new_mem->SetID(memtable_id);
@ -414,8 +412,7 @@ class ColumnFamilyData {
const CompactRangeOptions& compact_range_options,
const InternalKey* begin, const InternalKey* end,
InternalKey** compaction_end, bool* manual_conflict,
uint64_t max_file_num_to_ignore,
const std::string& trim_ts);
uint64_t max_file_num_to_ignore);
CompactionPicker* compaction_picker() { return compaction_picker_.get(); }
// thread-safe
@ -521,10 +518,9 @@ class ColumnFamilyData {
ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); }
WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; }
static const uint32_t kDummyColumnFamilyDataId;
private:
friend class ColumnFamilySet;
static const uint32_t kDummyColumnFamilyDataId;
ColumnFamilyData(uint32_t id, const std::string& name,
Version* dummy_versions, Cache* table_cache,
WriteBufferManager* write_buffer_manager,
@ -630,8 +626,10 @@ class ColumnFamilyData {
// held and it needs to be executed from the write thread. SetDropped() also
// guarantees that it will be called only from single-threaded LogAndApply(),
// but this condition is not that important.
// * Iteration -- hold DB mutex. If you want to release the DB mutex in the
// body of the iteration, wrap in a RefedColumnFamilySet.
// * Iteration -- hold DB mutex, but you can release it in the body of
// iteration. If you release DB mutex in body, reference the column
// family before the mutex and unreference after you unlock, since the column
// family might get dropped when the DB mutex is released
// * GetDefault() -- thread safe
// * GetColumnFamily() -- either inside of DB mutex or from a write thread
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily(),
@ -643,12 +641,17 @@ class ColumnFamilySet {
public:
explicit iterator(ColumnFamilyData* cfd)
: current_(cfd) {}
// NOTE: minimum operators for for-loop iteration
iterator& operator++() {
// dropped column families might still be included in this iteration
// (we're only removing them when client drops the last reference to the
// column family).
// dummy is never dead, so this will never be infinite
do {
current_ = current_->next_;
} while (current_->refs_.load(std::memory_order_relaxed) == 0);
return *this;
}
bool operator!=(const iterator& other) const {
bool operator!=(const iterator& other) {
return this->current_ != other.current_;
}
ColumnFamilyData* operator*() { return current_; }
@ -687,6 +690,10 @@ class ColumnFamilySet {
iterator begin() { return iterator(dummy_cfd_->next_); }
iterator end() { return iterator(dummy_cfd_); }
// REQUIRES: DB mutex held
// Don't call while iterating over ColumnFamilySet
void FreeDeadColumnFamilies();
Cache* get_table_cache() { return table_cache_; }
WriteBufferManager* write_buffer_manager() { return write_buffer_manager_; }
@ -706,8 +713,8 @@ class ColumnFamilySet {
// * when reading, at least one condition needs to be satisfied:
// 1. DB mutex locked
// 2. accessed from a single-threaded write thread
UnorderedMap<std::string, uint32_t> column_families_;
UnorderedMap<uint32_t, ColumnFamilyData*> column_family_data_;
std::unordered_map<std::string, uint32_t> column_families_;
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_;
uint32_t max_column_family_;
const FileOptions file_options_;
@ -729,55 +736,6 @@ class ColumnFamilySet {
std::string db_session_id_;
};
// A wrapper for ColumnFamilySet that supports releasing DB mutex during each
// iteration over the iterator, because the cfd is Refed and Unrefed during
// each iteration to prevent concurrent CF drop from destroying it (until
// Unref).
class RefedColumnFamilySet {
public:
explicit RefedColumnFamilySet(ColumnFamilySet* cfs) : wrapped_(cfs) {}
class iterator {
public:
explicit iterator(ColumnFamilySet::iterator wrapped) : wrapped_(wrapped) {
MaybeRef(*wrapped_);
}
~iterator() { MaybeUnref(*wrapped_); }
inline void MaybeRef(ColumnFamilyData* cfd) {
if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) {
cfd->Ref();
}
}
inline void MaybeUnref(ColumnFamilyData* cfd) {
if (cfd->GetID() != ColumnFamilyData::kDummyColumnFamilyDataId) {
cfd->UnrefAndTryDelete();
}
}
// NOTE: minimum operators for for-loop iteration
inline iterator& operator++() {
ColumnFamilyData* old = *wrapped_;
++wrapped_;
// Can only unref & potentially free cfd after accessing its next_
MaybeUnref(old);
MaybeRef(*wrapped_);
return *this;
}
inline bool operator!=(const iterator& other) const {
return this->wrapped_ != other.wrapped_;
}
inline ColumnFamilyData* operator*() { return *wrapped_; }
private:
ColumnFamilySet::iterator wrapped_;
};
iterator begin() { return iterator(wrapped_->begin()); }
iterator end() { return iterator(wrapped_->end()); }
private:
ColumnFamilySet* wrapped_;
};
// We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
// memtables of different column families (specified by ID in the write batch)
class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables {

View File

@ -383,7 +383,7 @@ class ColumnFamilyTestBase : public testing::Test {
int NumTableFilesAtLevel(int level, int cf) {
return GetProperty(cf,
"rocksdb.num-files-at-level" + std::to_string(level));
"rocksdb.num-files-at-level" + ToString(level));
}
#ifndef ROCKSDB_LITE
@ -554,7 +554,7 @@ class ColumnFamilyTest
INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest,
testing::Values(test::kDefaultFormatVersion));
INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest,
testing::Values(kLatestFormatVersion));
testing::Values(test::kLatestFormatVersion));
TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) {
for (int iter = 0; iter < 3; ++iter) {
@ -746,8 +746,8 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(test::kDefaultFormatVersion, false)));
INSTANTIATE_TEST_CASE_P(
FormatLatest, FlushEmptyCFTestWithParam,
testing::Values(std::make_tuple(kLatestFormatVersion, true),
std::make_tuple(kLatestFormatVersion, false)));
testing::Values(std::make_tuple(test::kLatestFormatVersion, true),
std::make_tuple(test::kLatestFormatVersion, false)));
TEST_P(ColumnFamilyTest, AddDrop) {
Open();
@ -783,7 +783,7 @@ TEST_P(ColumnFamilyTest, BulkAddDrop) {
std::vector<std::string> cf_names;
std::vector<ColumnFamilyHandle*> cf_handles;
for (int i = 1; i <= kNumCF; i++) {
cf_names.push_back("cf1-" + std::to_string(i));
cf_names.push_back("cf1-" + ToString(i));
}
ASSERT_OK(db_->CreateColumnFamilies(cf_options, cf_names, &cf_handles));
for (int i = 1; i <= kNumCF; i++) {
@ -796,8 +796,7 @@ TEST_P(ColumnFamilyTest, BulkAddDrop) {
}
cf_handles.clear();
for (int i = 1; i <= kNumCF; i++) {
cf_descriptors.emplace_back("cf2-" + std::to_string(i),
ColumnFamilyOptions());
cf_descriptors.emplace_back("cf2-" + ToString(i), ColumnFamilyOptions());
}
ASSERT_OK(db_->CreateColumnFamilies(cf_descriptors, &cf_handles));
for (int i = 1; i <= kNumCF; i++) {
@ -821,7 +820,7 @@ TEST_P(ColumnFamilyTest, DropTest) {
Open({"default"});
CreateColumnFamiliesAndReopen({"pikachu"});
for (int i = 0; i < 100; ++i) {
ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i)));
ASSERT_OK(Put(1, ToString(i), "bar" + ToString(i)));
}
ASSERT_OK(Flush(1));
@ -1345,7 +1344,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) {
PutRandomData(1, 10, 12000);
PutRandomData(1, 1, 10);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
// SETUP column family "two" -- level style with 4 levels
@ -1353,7 +1352,7 @@ TEST_P(ColumnFamilyTest, DifferentCompactionStyles) {
PutRandomData(2, 10, 12000);
PutRandomData(2, 1, 10);
WaitForFlush(2);
AssertFilesPerLevel(std::to_string(i + 1), 2);
AssertFilesPerLevel(ToString(i + 1), 2);
}
// TRIGGER compaction "one"
@ -1417,7 +1416,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
bool cf_1_1 = true;
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
@ -1447,7 +1446,7 @@ TEST_P(ColumnFamilyTest, MultipleManualCompactions) {
PutRandomData(2, 10, 12000);
PutRandomData(2, 1, 10);
WaitForFlush(2);
AssertFilesPerLevel(std::to_string(i + 1), 2);
AssertFilesPerLevel(ToString(i + 1), 2);
}
threads.emplace_back([&] {
TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:1");
@ -1534,7 +1533,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1");
@ -1544,7 +1543,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndManualCompactions) {
PutRandomData(2, 10, 12000);
PutRandomData(2, 1, 10);
WaitForFlush(2);
AssertFilesPerLevel(std::to_string(i + 1), 2);
AssertFilesPerLevel(ToString(i + 1), 2);
}
ROCKSDB_NAMESPACE::port::Thread threads([&] {
CompactRangeOptions compact_options;
@ -1616,7 +1615,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
bool cf_1_1 = true;
bool cf_1_2 = true;
@ -1651,7 +1650,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutomaticCompactions) {
PutRandomData(2, 10, 12000);
PutRandomData(2, 1, 10);
WaitForFlush(2);
AssertFilesPerLevel(std::to_string(i + 1), 2);
AssertFilesPerLevel(ToString(i + 1), 2);
}
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5");
threads.join();
@ -1710,7 +1709,7 @@ TEST_P(ColumnFamilyTest, SameCFManualManualCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
bool cf_1_1 = true;
bool cf_1_2 = true;
@ -1749,8 +1748,8 @@ TEST_P(ColumnFamilyTest, SameCFManualManualCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
1);
}
ROCKSDB_NAMESPACE::port::Thread threads1([&] {
@ -1812,7 +1811,7 @@ TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
bool cf_1_1 = true;
bool cf_1_2 = true;
@ -1850,8 +1849,8 @@ TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
1);
}
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
@ -1905,7 +1904,7 @@ TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
bool cf_1_1 = true;
bool cf_1_2 = true;
@ -1943,8 +1942,8 @@ TEST_P(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(
std::to_string(one.level0_file_num_compaction_trigger + i), 1);
AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
1);
}
TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
@ -2025,7 +2024,7 @@ TEST_P(ColumnFamilyTest, SameCFAutomaticManualCompactions) {
PutRandomData(1, 10, 12000, true);
PutRandomData(1, 1, 10, true);
WaitForFlush(1);
AssertFilesPerLevel(std::to_string(i + 1), 1);
AssertFilesPerLevel(ToString(i + 1), 1);
}
TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5");
@ -2976,8 +2975,7 @@ TEST_P(ColumnFamilyTest, FlushCloseWALFiles) {
SpecialEnv env(Env::Default());
db_options_.env = &env;
db_options_.max_background_flushes = 1;
column_family_options_.memtable_factory.reset(
test::NewSpecialSkipListFactory(2));
column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2));
Open();
CreateColumnFamilies({"one"});
ASSERT_OK(Put(1, "fodor", "mirko"));
@ -3022,8 +3020,7 @@ TEST_P(ColumnFamilyTest, IteratorCloseWALFile1) {
SpecialEnv env(Env::Default());
db_options_.env = &env;
db_options_.max_background_flushes = 1;
column_family_options_.memtable_factory.reset(
test::NewSpecialSkipListFactory(2));
column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2));
Open();
CreateColumnFamilies({"one"});
ASSERT_OK(Put(1, "fodor", "mirko"));
@ -3074,8 +3071,7 @@ TEST_P(ColumnFamilyTest, IteratorCloseWALFile2) {
env.SetBackgroundThreads(2, Env::HIGH);
db_options_.env = &env;
db_options_.max_background_flushes = 1;
column_family_options_.memtable_factory.reset(
test::NewSpecialSkipListFactory(2));
column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(2));
Open();
CreateColumnFamilies({"one"});
ASSERT_OK(Put(1, "fodor", "mirko"));
@ -3133,8 +3129,7 @@ TEST_P(ColumnFamilyTest, ForwardIteratorCloseWALFile) {
env.SetBackgroundThreads(2, Env::HIGH);
db_options_.env = &env;
db_options_.max_background_flushes = 1;
column_family_options_.memtable_factory.reset(
test::NewSpecialSkipListFactory(3));
column_family_options_.memtable_factory.reset(new SpecialSkipListFactory(3));
column_family_options_.level0_file_num_compaction_trigger = 2;
Open();
CreateColumnFamilies({"one"});
@ -3408,32 +3403,16 @@ TEST(ColumnFamilyTest, ValidateBlobGCCutoff) {
.IsInvalidArgument());
}
TEST(ColumnFamilyTest, ValidateBlobGCForceThreshold) {
DBOptions db_options;
ColumnFamilyOptions cf_options;
cf_options.enable_blob_garbage_collection = true;
cf_options.blob_garbage_collection_force_threshold = -0.5;
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
.IsInvalidArgument());
cf_options.blob_garbage_collection_force_threshold = 0.0;
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
cf_options.blob_garbage_collection_force_threshold = 0.5;
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
cf_options.blob_garbage_collection_force_threshold = 1.0;
ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options));
cf_options.blob_garbage_collection_force_threshold = 1.5;
ASSERT_TRUE(ColumnFamilyData::ValidateOptions(db_options, cf_options)
.IsInvalidArgument());
}
} // namespace ROCKSDB_NAMESPACE
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
extern "C" {
void RegisterCustomObjects(int argc, char** argv);
}
#else
void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);

View File

@ -91,8 +91,8 @@ TEST_F(CompactFilesTest, L0ConflictsFiles) {
// create couple files
// Background compaction starts and waits in BackgroundCallCompaction:0
for (int i = 0; i < kLevel0Trigger * 4; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
ASSERT_OK(db->Put(WriteOptions(), std::to_string(100 - i), ""));
ASSERT_OK(db->Put(WriteOptions(), ToString(i), ""));
ASSERT_OK(db->Put(WriteOptions(), ToString(100 - i), ""));
ASSERT_OK(db->Flush(FlushOptions()));
}
@ -136,20 +136,17 @@ TEST_F(CompactFilesTest, MultipleLevel) {
// create couple files in L0, L3, L4 and L5
for (int i = 5; i > 2; --i) {
collector->ClearFlushedFiles();
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
ASSERT_OK(db->Put(WriteOptions(), ToString(i), ""));
ASSERT_OK(db->Flush(FlushOptions()));
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, i));
std::string prop;
ASSERT_TRUE(db->GetProperty(
"rocksdb.num-files-at-level" + std::to_string(i), &prop));
ASSERT_TRUE(
db->GetProperty("rocksdb.num-files-at-level" + ToString(i), &prop));
ASSERT_EQ("1", prop);
}
ASSERT_OK(db->Put(WriteOptions(), std::to_string(0), ""));
ASSERT_OK(db->Put(WriteOptions(), ToString(0), ""));
ASSERT_OK(db->Flush(FlushOptions()));
ColumnFamilyMetaData meta;
@ -218,7 +215,7 @@ TEST_F(CompactFilesTest, ObsoleteFiles) {
// create couple files
for (int i = 1000; i < 2000; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
ASSERT_OK(db->Put(WriteOptions(), ToString(i),
std::string(kWriteBufferSize / 10, 'a' + (i % 26))));
}
@ -257,14 +254,14 @@ TEST_F(CompactFilesTest, NotCutOutputOnLevel0) {
// create couple files
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
ASSERT_OK(db->Put(WriteOptions(), ToString(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
auto l0_files_1 = collector->GetFlushedFiles();
collector->ClearFlushedFiles();
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
ASSERT_OK(db->Put(WriteOptions(), ToString(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
@ -295,13 +292,10 @@ TEST_F(CompactFilesTest, CapturingPendingFiles) {
// Create 5 files.
for (int i = 0; i < 5; ++i) {
ASSERT_OK(db->Put(WriteOptions(), "key" + std::to_string(i), "value"));
ASSERT_OK(db->Put(WriteOptions(), "key" + ToString(i), "value"));
ASSERT_OK(db->Flush(FlushOptions()));
}
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
EXPECT_EQ(5, l0_files.size());
@ -420,9 +414,6 @@ TEST_F(CompactFilesTest, SentinelCompressionType) {
ASSERT_OK(db->Put(WriteOptions(), "key", "val"));
ASSERT_OK(db->Flush(FlushOptions()));
// Ensure background work is fully finished including listener callbacks
// before accessing listener state.
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForBackgroundWork());
auto l0_files = collector->GetFlushedFiles();
ASSERT_EQ(1, l0_files.size());
@ -465,7 +456,7 @@ TEST_F(CompactFilesTest, GetCompactionJobInfo) {
// create couple files
for (int i = 0; i < 500; ++i) {
ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
ASSERT_OK(db->Put(WriteOptions(), ToString(i),
std::string(1000, 'a' + (i % 26))));
}
ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());

View File

@ -10,24 +10,22 @@
#include <string>
#include <vector>
#include "db/dbformat.h"
#include "rocksdb/comparator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/vector_iterator.h"
namespace ROCKSDB_NAMESPACE {
// A vector iterator which does its own bounds checking. This is for testing the
// optimizations in the clipping iterator where we bypass the bounds checking if
// the input iterator has already performed it.
class BoundsCheckingVectorIterator : public VectorIterator {
class BoundsCheckingVectorIterator : public test::VectorIterator {
public:
BoundsCheckingVectorIterator(const std::vector<std::string>& keys,
const std::vector<std::string>& values,
const Slice* start, const Slice* end,
const Comparator* cmp)
: VectorIterator(keys, values, cmp), start_(start), end_(end), cmp_(cmp) {
: VectorIterator(keys, values), start_(start), end_(end), cmp_(cmp) {
assert(cmp_);
}
@ -107,7 +105,7 @@ TEST_P(ClippingIteratorTest, Clip) {
use_bounds_checking_vec_it
? new BoundsCheckingVectorIterator(input_keys, input_values, &start,
&end, BytewiseComparator())
: new VectorIterator(input_keys, input_values, BytewiseComparator()));
: new test::VectorIterator(input_keys, input_values));
ClippingIterator clip(input.get(), &start, &end, BytewiseComparator());

View File

@ -213,8 +213,8 @@ Compaction::Compaction(
uint32_t _output_path_id, CompressionType _compression,
CompressionOptions _compression_opts, Temperature _output_temperature,
uint32_t _max_subcompactions, std::vector<FileMetaData*> _grandparents,
bool _manual_compaction, const std::string& _trim_ts, double _score,
bool _deletion_compaction, CompactionReason _compaction_reason)
bool _manual_compaction, double _score, bool _deletion_compaction,
CompactionReason _compaction_reason)
: input_vstorage_(vstorage),
start_level_(_inputs[0].level),
output_level_(_output_level),
@ -237,7 +237,6 @@ Compaction::Compaction(
bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
is_manual_compaction_(_manual_compaction),
trim_ts_(_trim_ts),
is_trivial_move_(false),
compaction_reason_(_compaction_reason),
notify_on_compaction_completion_(false) {
@ -278,8 +277,8 @@ Compaction::~Compaction() {
bool Compaction::InputCompressionMatchesOutput() const {
int base_level = input_vstorage_->base_level();
bool matches =
(GetCompressionType(input_vstorage_, mutable_cf_options_, start_level_,
bool matches = (GetCompressionType(immutable_options_, input_vstorage_,
mutable_cf_options_, start_level_,
base_level) == output_compression_);
if (matches) {
TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
@ -319,8 +318,7 @@ bool Compaction::IsTrivialMove() const {
// Used in universal compaction, where trivial move can be done if the
// input files are non overlapping
if ((mutable_cf_options_.compaction_options_universal.allow_trivial_move) &&
(output_level_ != 0) &&
(cfd_->ioptions()->compaction_style == kCompactionStyleUniversal)) {
(output_level_ != 0)) {
return is_trivial_move_;
}
@ -518,7 +516,7 @@ uint64_t Compaction::OutputFilePreallocationSize() const {
}
}
if (max_output_file_size_ != std::numeric_limits<uint64_t>::max() &&
if (max_output_file_size_ != port::kMaxUint64 &&
(immutable_options_.compaction_style == kCompactionStyleLevel ||
output_level() > 0)) {
preallocation_size = std::min(max_output_file_size_, preallocation_size);
@ -614,19 +612,10 @@ bool Compaction::DoesInputReferenceBlobFiles() const {
return false;
}
uint64_t Compaction::MinInputFileOldestAncesterTime(
const InternalKey* start, const InternalKey* end) const {
uint64_t min_oldest_ancester_time = std::numeric_limits<uint64_t>::max();
const InternalKeyComparator& icmp =
column_family_data()->internal_comparator();
uint64_t Compaction::MinInputFileOldestAncesterTime() const {
uint64_t min_oldest_ancester_time = port::kMaxUint64;
for (const auto& level_files : inputs_) {
for (const auto& file : level_files.files) {
if (start != nullptr && icmp.Compare(file->largest, *start) < 0) {
continue;
}
if (end != nullptr && icmp.Compare(file->smallest, *end) > 0) {
continue;
}
uint64_t oldest_ancester_time = file->TryGetOldestAncesterTime();
if (oldest_ancester_time != 0) {
min_oldest_ancester_time =

View File

@ -79,8 +79,8 @@ class Compaction {
CompressionOptions compression_opts,
Temperature output_temperature, uint32_t max_subcompactions,
std::vector<FileMetaData*> grandparents,
bool manual_compaction = false, const std::string& trim_ts = "",
double score = -1, bool deletion_compaction = false,
bool manual_compaction = false, double score = -1,
bool deletion_compaction = false,
CompactionReason compaction_reason = CompactionReason::kUnknown);
// No copying allowed
@ -208,8 +208,6 @@ class Compaction {
// Was this compaction triggered manually by the client?
bool is_manual_compaction() const { return is_manual_compaction_; }
std::string trim_ts() const { return trim_ts_; }
// Used when allow_trivial_move option is set in
// Universal compaction. If all the input files are
// non overlapping, then is_trivial_move_ variable
@ -294,7 +292,7 @@ class Compaction {
int GetInputBaseLevel() const;
CompactionReason compaction_reason() const { return compaction_reason_; }
CompactionReason compaction_reason() { return compaction_reason_; }
const std::vector<FileMetaData*>& grandparents() const {
return grandparents_;
@ -306,10 +304,7 @@ class Compaction {
uint32_t max_subcompactions() const { return max_subcompactions_; }
// start and end are sub compact range. Null if no boundary.
// This is used to filter out some input files' ancester's time range.
uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
const InternalKey* end) const;
uint64_t MinInputFileOldestAncesterTime() const;
// Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
// compaction begin and compaction completion callbacks match.
@ -387,9 +382,6 @@ class Compaction {
// Is this compaction requested by the client?
const bool is_manual_compaction_;
// The data with timestamp > trim_ts_ will be removed
const std::string trim_ts_;
// True if we can do trivial move in Universal multi level
// compaction
bool is_trivial_move_;

Some files were not shown because too many files have changed in this diff Show More