Compare commits
36 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b9a4a10659 | ||
|
1c63b82b9a | ||
|
64dbc7813e | ||
|
466af89356 | ||
|
614c6d453e | ||
|
03492d61d4 | ||
|
49f97dc93e | ||
|
fc38b0d5fe | ||
|
3598e1a9a9 | ||
|
d07167c9a3 | ||
|
931e6704af | ||
|
e8e81bc39f | ||
|
689c15523f | ||
|
1369736b18 | ||
|
95dc6cd6ed | ||
|
641fae60f6 | ||
|
b7434c29d2 | ||
|
a1774dde9a | ||
|
65b2298510 | ||
|
acba14b3d9 | ||
|
53f760b8a8 | ||
|
35c05bca0f | ||
|
9ae0528dc4 | ||
|
4eeb1bf0a6 | ||
|
3bcc31295a | ||
|
e78f5cfba7 | ||
|
97773d0967 | ||
|
35c950a94e | ||
|
e265e08a02 | ||
|
663d24f467 | ||
|
ec43385bf3 | ||
|
8a643b70fd | ||
|
de0891ec01 | ||
|
33564d2c10 | ||
|
96de211f4c | ||
|
8522d9c74d |
@ -71,7 +71,10 @@ install:
|
|||||||
CC=gcc-8 && CXX=g++-8;
|
CC=gcc-8 && CXX=g++-8;
|
||||||
fi
|
fi
|
||||||
- if [[ "${JOB_NAME}" == cmake* ]] && [ "${TRAVIS_OS_NAME}" == linux ]; then
|
- if [[ "${JOB_NAME}" == cmake* ]] && [ "${TRAVIS_OS_NAME}" == linux ]; then
|
||||||
mkdir cmake-dist && curl -sfSL https://cmake.org/files/v3.8/cmake-3.8.1-Linux-x86_64.tar.gz | tar --strip-components=1 -C cmake-dist -xz && export PATH=$PWD/cmake-dist/bin:$PATH;
|
mkdir cmake-dist && curl --silent --fail --show-error --location https://github.com/Kitware/CMake/releases/download/v3.14.5/cmake-3.14.5-Linux-x86_64.tar.gz | tar --strip-components=1 -C cmake-dist -xz && export PATH=$PWD/cmake-dist/bin:$PATH;
|
||||||
|
fi
|
||||||
|
- if [[ "${JOB_NAME}" == java_test ]]; then
|
||||||
|
java -version && echo "JAVA_HOME=${JAVA_HOME}";
|
||||||
fi
|
fi
|
||||||
|
|
||||||
before_script:
|
before_script:
|
||||||
|
@ -504,7 +504,6 @@ set(SOURCES
|
|||||||
db/merge_helper.cc
|
db/merge_helper.cc
|
||||||
db/merge_operator.cc
|
db/merge_operator.cc
|
||||||
db/range_del_aggregator.cc
|
db/range_del_aggregator.cc
|
||||||
db/range_del_aggregator_v2.cc
|
|
||||||
db/range_tombstone_fragmenter.cc
|
db/range_tombstone_fragmenter.cc
|
||||||
db/repair.cc
|
db/repair.cc
|
||||||
db/snapshot_impl.cc
|
db/snapshot_impl.cc
|
||||||
@ -907,7 +906,6 @@ if(WITH_TESTS)
|
|||||||
db/plain_table_db_test.cc
|
db/plain_table_db_test.cc
|
||||||
db/prefix_test.cc
|
db/prefix_test.cc
|
||||||
db/range_del_aggregator_test.cc
|
db/range_del_aggregator_test.cc
|
||||||
db/range_del_aggregator_v2_test.cc
|
|
||||||
db/range_tombstone_fragmenter_test.cc
|
db/range_tombstone_fragmenter_test.cc
|
||||||
db/repair_test.cc
|
db/repair_test.cc
|
||||||
db/table_properties_collector_test.cc
|
db/table_properties_collector_test.cc
|
||||||
|
19
HISTORY.md
19
HISTORY.md
@ -1,11 +1,17 @@
|
|||||||
# Rocksdb Change Log
|
# Rocksdb Change Log
|
||||||
## Unreleased
|
## 5.18.4 (3/3/2020)
|
||||||
### New Features
|
* Various fixes for ARM64 support (#6250)
|
||||||
|
* Fix JEMALLOC_CXX_THROW macro missing from older Jemalloc versions, causing build failures on some platforms.
|
||||||
|
|
||||||
|
## 5.18.3 (2/11/2019)
|
||||||
|
### Bug Fixes
|
||||||
|
* Fix possible LSM corruption when both range deletions and subcompactions are used. The symptom of this corruption is L1+ files overlapping in the user key space.
|
||||||
|
|
||||||
|
## 5.18.2 (01/31/2019)
|
||||||
|
|
||||||
### Public API Change
|
### Public API Change
|
||||||
|
* Change time resolution in FileOperationInfo.
|
||||||
### Bug Fixes
|
* Deleting Blob files also go through SStFileManager.
|
||||||
* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls.
|
|
||||||
|
|
||||||
## 5.18.0 (11/30/2018)
|
## 5.18.0 (11/30/2018)
|
||||||
### New Features
|
### New Features
|
||||||
@ -18,6 +24,7 @@
|
|||||||
* Add xxhash64 checksum support
|
* Add xxhash64 checksum support
|
||||||
* Introduced `MemoryAllocator`, which lets the user specify custom memory allocator for block based table.
|
* Introduced `MemoryAllocator`, which lets the user specify custom memory allocator for block based table.
|
||||||
* Improved `DeleteRange` to prevent read performance degradation. The feature is no longer marked as experimental.
|
* Improved `DeleteRange` to prevent read performance degradation. The feature is no longer marked as experimental.
|
||||||
|
* Enabled checkpoint on readonly db (DBImplReadOnly).
|
||||||
|
|
||||||
### Public API Change
|
### Public API Change
|
||||||
* `DBOptions::use_direct_reads` now affects reads issued by `BackupEngine` on the database's SSTs.
|
* `DBOptions::use_direct_reads` now affects reads issued by `BackupEngine` on the database's SSTs.
|
||||||
@ -34,6 +41,8 @@
|
|||||||
* Fixed Get correctness bug in the presence of range tombstones where merge operands covered by a range tombstone always result in NotFound.
|
* Fixed Get correctness bug in the presence of range tombstones where merge operands covered by a range tombstone always result in NotFound.
|
||||||
* Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously.
|
* Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously.
|
||||||
* The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files.
|
* The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files.
|
||||||
|
* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls.
|
||||||
|
* Make DB ignore dropped column families while committing results of atomic flush.
|
||||||
|
|
||||||
## 5.17.0 (10/05/2018)
|
## 5.17.0 (10/05/2018)
|
||||||
### Public API Change
|
### Public API Change
|
||||||
|
@ -43,6 +43,8 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
|
|||||||
command line flags processing. You can compile rocksdb library even
|
command line flags processing. You can compile rocksdb library even
|
||||||
if you don't have gflags installed.
|
if you don't have gflags installed.
|
||||||
|
|
||||||
|
* If you wish to build the RocksJava static target, then cmake is required for building Snappy.
|
||||||
|
|
||||||
## Supported platforms
|
## Supported platforms
|
||||||
|
|
||||||
* **Linux - Ubuntu**
|
* **Linux - Ubuntu**
|
||||||
|
72
Makefile
72
Makefile
@ -137,6 +137,12 @@ CFLAGS += -DHAVE_POWER8
|
|||||||
HAVE_POWER8=1
|
HAVE_POWER8=1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc -xc /dev/null 2>&1))
|
||||||
|
CXXFLAGS += -march=armv8-a+crc
|
||||||
|
CFLAGS += -march=armv8-a+crc
|
||||||
|
ARMCRC_SOURCE=1
|
||||||
|
endif
|
||||||
|
|
||||||
# if we're compiling for release, compile without debug code (-DNDEBUG)
|
# if we're compiling for release, compile without debug code (-DNDEBUG)
|
||||||
ifeq ($(DEBUG_LEVEL),0)
|
ifeq ($(DEBUG_LEVEL),0)
|
||||||
OPT += -DNDEBUG
|
OPT += -DNDEBUG
|
||||||
@ -543,7 +549,6 @@ TESTS = \
|
|||||||
persistent_cache_test \
|
persistent_cache_test \
|
||||||
statistics_test \
|
statistics_test \
|
||||||
lua_test \
|
lua_test \
|
||||||
range_del_aggregator_test \
|
|
||||||
lru_cache_test \
|
lru_cache_test \
|
||||||
object_registry_test \
|
object_registry_test \
|
||||||
repair_test \
|
repair_test \
|
||||||
@ -554,7 +559,7 @@ TESTS = \
|
|||||||
trace_analyzer_test \
|
trace_analyzer_test \
|
||||||
repeatable_thread_test \
|
repeatable_thread_test \
|
||||||
range_tombstone_fragmenter_test \
|
range_tombstone_fragmenter_test \
|
||||||
range_del_aggregator_v2_test \
|
range_del_aggregator_test \
|
||||||
sst_file_reader_test \
|
sst_file_reader_test \
|
||||||
|
|
||||||
PARALLEL_TEST = \
|
PARALLEL_TEST = \
|
||||||
@ -1588,9 +1593,6 @@ repeatable_thread_test: util/repeatable_thread_test.o $(LIBOBJECTS) $(TESTHARNES
|
|||||||
range_tombstone_fragmenter_test: db/range_tombstone_fragmenter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
|
range_tombstone_fragmenter_test: db/range_tombstone_fragmenter_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(AM_LINK)
|
$(AM_LINK)
|
||||||
|
|
||||||
range_del_aggregator_v2_test: db/range_del_aggregator_v2_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
|
|
||||||
$(AM_LINK)
|
|
||||||
|
|
||||||
sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(AM_LINK)
|
$(AM_LINK)
|
||||||
|
|
||||||
@ -1639,7 +1641,7 @@ JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux
|
|||||||
ifeq ($(PLATFORM), OS_SOLARIS)
|
ifeq ($(PLATFORM), OS_SOLARIS)
|
||||||
ARCH := $(shell isainfo -b)
|
ARCH := $(shell isainfo -b)
|
||||||
else ifeq ($(PLATFORM), OS_OPENBSD)
|
else ifeq ($(PLATFORM), OS_OPENBSD)
|
||||||
ifneq (,$(filter $(MACHINE), amd64 arm64 sparc64))
|
ifneq (,$(filter amd64 ppc64 ppc64le arm64 aarch64 sparc64, $(MACHINE)))
|
||||||
ARCH := 64
|
ARCH := 64
|
||||||
else
|
else
|
||||||
ARCH := 32
|
ARCH := 32
|
||||||
@ -1648,10 +1650,10 @@ else
|
|||||||
ARCH := $(shell getconf LONG_BIT)
|
ARCH := $(shell getconf LONG_BIT)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq (,$(findstring ppc,$(MACHINE)))
|
ifneq (,$(filter ppc% arm64 aarch64 sparc64, $(MACHINE)))
|
||||||
ROCKSDBJNILIB = librocksdbjni-linux$(ARCH).so
|
|
||||||
else
|
|
||||||
ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE).so
|
ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE).so
|
||||||
|
else
|
||||||
|
ROCKSDBJNILIB = librocksdbjni-linux$(ARCH).so
|
||||||
endif
|
endif
|
||||||
ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH).jar
|
ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-linux$(ARCH).jar
|
||||||
ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar
|
ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH).jar
|
||||||
@ -1664,15 +1666,15 @@ ZLIB_SHA256 ?= c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
|
|||||||
ZLIB_DOWNLOAD_BASE ?= http://zlib.net
|
ZLIB_DOWNLOAD_BASE ?= http://zlib.net
|
||||||
BZIP2_VER ?= 1.0.6
|
BZIP2_VER ?= 1.0.6
|
||||||
BZIP2_SHA256 ?= a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd
|
BZIP2_SHA256 ?= a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd
|
||||||
BZIP2_DOWNLOAD_BASE ?= https://web.archive.org/web/20180624184835/http://www.bzip.org
|
BZIP2_DOWNLOAD_BASE ?= https://downloads.sourceforge.net/project/bzip2
|
||||||
SNAPPY_VER ?= 1.1.4
|
SNAPPY_VER ?= 1.1.8
|
||||||
SNAPPY_SHA256 ?= 134bfe122fd25599bb807bb8130e7ba6d9bdb851e0b16efcb83ac4f5d0b70057
|
SNAPPY_SHA256 ?= 16b677f07832a612b0836178db7f374e414f94657c138e6993cbfc5dcc58651f
|
||||||
SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/releases/download
|
SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/archive
|
||||||
LZ4_VER ?= 1.8.0
|
LZ4_VER ?= 1.9.2
|
||||||
LZ4_SHA256 ?= 2ca482ea7a9bb103603108b5a7510b7592b90158c151ff50a28f1ca8389fccf6
|
LZ4_SHA256 ?= 658ba6191fa44c92280d4aa2c271b0f4fbc0e34d249578dd05e50e76d0e5efcc
|
||||||
LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive
|
LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive
|
||||||
ZSTD_VER ?= 1.3.3
|
ZSTD_VER ?= 1.4.4
|
||||||
ZSTD_SHA256 ?= a77c47153ee7de02626c5b2a097005786b71688be61e9fb81806a011f90b297b
|
ZSTD_SHA256 ?= a364f5162c7d1a455cc915e8e3cf5f4bd8b75d09bc0f53965b0c9ca1383c52c8
|
||||||
ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive
|
ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive
|
||||||
CURL_SSL_OPTS ?= --tlsv1
|
CURL_SSL_OPTS ?= --tlsv1
|
||||||
|
|
||||||
@ -1711,7 +1713,9 @@ endif
|
|||||||
|
|
||||||
libz.a:
|
libz.a:
|
||||||
-rm -rf zlib-$(ZLIB_VER)
|
-rm -rf zlib-$(ZLIB_VER)
|
||||||
curl -O -L ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz
|
ifeq (,$(wildcard ./zlib-$(ZLIB_VER).tar.gz))
|
||||||
|
curl --fail --output zlib-$(ZLIB_VER).tar.gz --location ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz
|
||||||
|
endif
|
||||||
ZLIB_SHA256_ACTUAL=`$(SHA256_CMD) zlib-$(ZLIB_VER).tar.gz | cut -d ' ' -f 1`; \
|
ZLIB_SHA256_ACTUAL=`$(SHA256_CMD) zlib-$(ZLIB_VER).tar.gz | cut -d ' ' -f 1`; \
|
||||||
if [ "$(ZLIB_SHA256)" != "$$ZLIB_SHA256_ACTUAL" ]; then \
|
if [ "$(ZLIB_SHA256)" != "$$ZLIB_SHA256_ACTUAL" ]; then \
|
||||||
echo zlib-$(ZLIB_VER).tar.gz checksum mismatch, expected=\"$(ZLIB_SHA256)\" actual=\"$$ZLIB_SHA256_ACTUAL\"; \
|
echo zlib-$(ZLIB_VER).tar.gz checksum mismatch, expected=\"$(ZLIB_SHA256)\" actual=\"$$ZLIB_SHA256_ACTUAL\"; \
|
||||||
@ -1723,7 +1727,9 @@ libz.a:
|
|||||||
|
|
||||||
libbz2.a:
|
libbz2.a:
|
||||||
-rm -rf bzip2-$(BZIP2_VER)
|
-rm -rf bzip2-$(BZIP2_VER)
|
||||||
curl -O -L ${BZIP2_DOWNLOAD_BASE}/$(BZIP2_VER)/bzip2-$(BZIP2_VER).tar.gz
|
ifeq (,$(wildcard ./bzip2-$(BZIP2_VER).tar.gz))
|
||||||
|
curl --fail --output bzip2-$(BZIP2_VER).tar.gz --location ${CURL_SSL_OPTS} ${BZIP2_DOWNLOAD_BASE}/bzip2-$(BZIP2_VER).tar.gz
|
||||||
|
endif
|
||||||
BZIP2_SHA256_ACTUAL=`$(SHA256_CMD) bzip2-$(BZIP2_VER).tar.gz | cut -d ' ' -f 1`; \
|
BZIP2_SHA256_ACTUAL=`$(SHA256_CMD) bzip2-$(BZIP2_VER).tar.gz | cut -d ' ' -f 1`; \
|
||||||
if [ "$(BZIP2_SHA256)" != "$$BZIP2_SHA256_ACTUAL" ]; then \
|
if [ "$(BZIP2_SHA256)" != "$$BZIP2_SHA256_ACTUAL" ]; then \
|
||||||
echo bzip2-$(BZIP2_VER).tar.gz checksum mismatch, expected=\"$(BZIP2_SHA256)\" actual=\"$$BZIP2_SHA256_ACTUAL\"; \
|
echo bzip2-$(BZIP2_VER).tar.gz checksum mismatch, expected=\"$(BZIP2_SHA256)\" actual=\"$$BZIP2_SHA256_ACTUAL\"; \
|
||||||
@ -1735,21 +1741,24 @@ libbz2.a:
|
|||||||
|
|
||||||
libsnappy.a:
|
libsnappy.a:
|
||||||
-rm -rf snappy-$(SNAPPY_VER)
|
-rm -rf snappy-$(SNAPPY_VER)
|
||||||
curl -O -L ${CURL_SSL_OPTS} ${SNAPPY_DOWNLOAD_BASE}/$(SNAPPY_VER)/snappy-$(SNAPPY_VER).tar.gz
|
ifeq (,$(wildcard ./snappy-$(SNAPPY_VER).tar.gz))
|
||||||
|
curl --fail --output snappy-$(SNAPPY_VER).tar.gz --location ${CURL_SSL_OPTS} ${SNAPPY_DOWNLOAD_BASE}/$(SNAPPY_VER).tar.gz
|
||||||
|
endif
|
||||||
SNAPPY_SHA256_ACTUAL=`$(SHA256_CMD) snappy-$(SNAPPY_VER).tar.gz | cut -d ' ' -f 1`; \
|
SNAPPY_SHA256_ACTUAL=`$(SHA256_CMD) snappy-$(SNAPPY_VER).tar.gz | cut -d ' ' -f 1`; \
|
||||||
if [ "$(SNAPPY_SHA256)" != "$$SNAPPY_SHA256_ACTUAL" ]; then \
|
if [ "$(SNAPPY_SHA256)" != "$$SNAPPY_SHA256_ACTUAL" ]; then \
|
||||||
echo snappy-$(SNAPPY_VER).tar.gz checksum mismatch, expected=\"$(SNAPPY_SHA256)\" actual=\"$$SNAPPY_SHA256_ACTUAL\"; \
|
echo snappy-$(SNAPPY_VER).tar.gz checksum mismatch, expected=\"$(SNAPPY_SHA256)\" actual=\"$$SNAPPY_SHA256_ACTUAL\"; \
|
||||||
exit 1; \
|
exit 1; \
|
||||||
fi
|
fi
|
||||||
tar xvzf snappy-$(SNAPPY_VER).tar.gz
|
tar xvzf snappy-$(SNAPPY_VER).tar.gz
|
||||||
cd snappy-$(SNAPPY_VER) && CFLAGS='${EXTRA_CFLAGS}' CXXFLAGS='${EXTRA_CXXFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' ./configure --with-pic --enable-static --disable-shared
|
mkdir snappy-$(SNAPPY_VER)/build
|
||||||
cd snappy-$(SNAPPY_VER) && $(MAKE) ${SNAPPY_MAKE_TARGET}
|
cd snappy-$(SNAPPY_VER)/build && CFLAGS='${EXTRA_CFLAGS}' CXXFLAGS='${EXTRA_CXXFLAGS}' LDFLAGS='${EXTRA_LDFLAGS}' cmake -DCMAKE_POSITION_INDEPENDENT_CODE=ON .. && $(MAKE) ${SNAPPY_MAKE_TARGET}
|
||||||
cp snappy-$(SNAPPY_VER)/.libs/libsnappy.a .
|
cp snappy-$(SNAPPY_VER)/build/libsnappy.a .
|
||||||
|
|
||||||
liblz4.a:
|
liblz4.a:
|
||||||
-rm -rf lz4-$(LZ4_VER)
|
-rm -rf lz4-$(LZ4_VER)
|
||||||
curl -O -L ${CURL_SSL_OPTS} ${LZ4_DOWNLOAD_BASE}/v$(LZ4_VER).tar.gz
|
ifeq (,$(wildcard ./lz4-$(LZ4_VER).tar.gz))
|
||||||
mv v$(LZ4_VER).tar.gz lz4-$(LZ4_VER).tar.gz
|
curl --fail --output lz4-$(LZ4_VER).tar.gz --location ${CURL_SSL_OPTS} ${LZ4_DOWNLOAD_BASE}/v$(LZ4_VER).tar.gz
|
||||||
|
endif
|
||||||
LZ4_SHA256_ACTUAL=`$(SHA256_CMD) lz4-$(LZ4_VER).tar.gz | cut -d ' ' -f 1`; \
|
LZ4_SHA256_ACTUAL=`$(SHA256_CMD) lz4-$(LZ4_VER).tar.gz | cut -d ' ' -f 1`; \
|
||||||
if [ "$(LZ4_SHA256)" != "$$LZ4_SHA256_ACTUAL" ]; then \
|
if [ "$(LZ4_SHA256)" != "$$LZ4_SHA256_ACTUAL" ]; then \
|
||||||
echo lz4-$(LZ4_VER).tar.gz checksum mismatch, expected=\"$(LZ4_SHA256)\" actual=\"$$LZ4_SHA256_ACTUAL\"; \
|
echo lz4-$(LZ4_VER).tar.gz checksum mismatch, expected=\"$(LZ4_SHA256)\" actual=\"$$LZ4_SHA256_ACTUAL\"; \
|
||||||
@ -1761,8 +1770,9 @@ liblz4.a:
|
|||||||
|
|
||||||
libzstd.a:
|
libzstd.a:
|
||||||
-rm -rf zstd-$(ZSTD_VER)
|
-rm -rf zstd-$(ZSTD_VER)
|
||||||
curl -O -L ${CURL_SSL_OPTS} ${ZSTD_DOWNLOAD_BASE}/v$(ZSTD_VER).tar.gz
|
ifeq (,$(wildcard ./zstd-$(ZSTD_VER).tar.gz))
|
||||||
mv v$(ZSTD_VER).tar.gz zstd-$(ZSTD_VER).tar.gz
|
curl --fail --output zstd-$(ZSTD_VER).tar.gz --location ${CURL_SSL_OPTS} ${ZSTD_DOWNLOAD_BASE}/v$(ZSTD_VER).tar.gz
|
||||||
|
endif
|
||||||
ZSTD_SHA256_ACTUAL=`$(SHA256_CMD) zstd-$(ZSTD_VER).tar.gz | cut -d ' ' -f 1`; \
|
ZSTD_SHA256_ACTUAL=`$(SHA256_CMD) zstd-$(ZSTD_VER).tar.gz | cut -d ' ' -f 1`; \
|
||||||
if [ "$(ZSTD_SHA256)" != "$$ZSTD_SHA256_ACTUAL" ]; then \
|
if [ "$(ZSTD_SHA256)" != "$$ZSTD_SHA256_ACTUAL" ]; then \
|
||||||
echo zstd-$(ZSTD_VER).tar.gz checksum mismatch, expected=\"$(ZSTD_SHA256)\" actual=\"$$ZSTD_SHA256_ACTUAL\"; \
|
echo zstd-$(ZSTD_VER).tar.gz checksum mismatch, expected=\"$(ZSTD_SHA256)\" actual=\"$$ZSTD_SHA256_ACTUAL\"; \
|
||||||
@ -1853,6 +1863,14 @@ rocksdbjavastaticdockerppc64le:
|
|||||||
fi
|
fi
|
||||||
docker start -a rocksdb_linux_ppc64le-be
|
docker start -a rocksdb_linux_ppc64le-be
|
||||||
|
|
||||||
|
rocksdbjavastaticdockerarm64v8:
|
||||||
|
mkdir -p java/target
|
||||||
|
DOCKER_LINUX_ARM64V8_CONTAINER=`docker ps -aqf name=rocksdb_linux_arm64v8-be`; \
|
||||||
|
if [ -z "$$DOCKER_LINUX_ARM64V8_CONTAINER" ]; then \
|
||||||
|
docker container create --attach stdin --attach stdout --attach stderr --volume `pwd`:/rocksdb-host --name rocksdb_linux_arm64v8-be evolvedbinary/rocksjava:centos7_arm64v8-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh; \
|
||||||
|
fi
|
||||||
|
docker start -a rocksdb_linux_arm64v8-be
|
||||||
|
|
||||||
rocksdbjavastaticpublish: rocksdbjavastaticrelease rocksdbjavastaticpublishcentral
|
rocksdbjavastaticpublish: rocksdbjavastaticrelease rocksdbjavastaticpublishcentral
|
||||||
|
|
||||||
rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastaticpublishcentral
|
rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastaticpublishcentral
|
||||||
|
14
TARGETS
14
TARGETS
@ -67,13 +67,11 @@ is_opt_mode = build_mode.startswith("opt")
|
|||||||
if is_opt_mode:
|
if is_opt_mode:
|
||||||
rocksdb_compiler_flags.append("-DNDEBUG")
|
rocksdb_compiler_flags.append("-DNDEBUG")
|
||||||
|
|
||||||
default_allocator = read_config("fbcode", "default_allocator")
|
|
||||||
|
|
||||||
sanitizer = read_config("fbcode", "sanitizer")
|
sanitizer = read_config("fbcode", "sanitizer")
|
||||||
|
|
||||||
# Let RocksDB aware of jemalloc existence.
|
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
|
||||||
# Do not enable it if sanitizer presents.
|
# whether the binary is linked with jemalloc at runtime.
|
||||||
if is_opt_mode and default_allocator.startswith("jemalloc") and sanitizer == "":
|
if sanitizer == "":
|
||||||
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
|
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
|
||||||
rocksdb_external_deps.append(("jemalloc", None, "headers"))
|
rocksdb_external_deps.append(("jemalloc", None, "headers"))
|
||||||
|
|
||||||
@ -124,7 +122,6 @@ cpp_library(
|
|||||||
"db/merge_helper.cc",
|
"db/merge_helper.cc",
|
||||||
"db/merge_operator.cc",
|
"db/merge_operator.cc",
|
||||||
"db/range_del_aggregator.cc",
|
"db/range_del_aggregator.cc",
|
||||||
"db/range_del_aggregator_v2.cc",
|
|
||||||
"db/range_tombstone_fragmenter.cc",
|
"db/range_tombstone_fragmenter.cc",
|
||||||
"db/repair.cc",
|
"db/repair.cc",
|
||||||
"db/snapshot_impl.cc",
|
"db/snapshot_impl.cc",
|
||||||
@ -935,11 +932,6 @@ ROCKS_TESTS = [
|
|||||||
"db/range_del_aggregator_test.cc",
|
"db/range_del_aggregator_test.cc",
|
||||||
"serial",
|
"serial",
|
||||||
],
|
],
|
||||||
[
|
|
||||||
"range_del_aggregator_v2_test",
|
|
||||||
"db/range_del_aggregator_v2_test.cc",
|
|
||||||
"serial",
|
|
||||||
],
|
|
||||||
[
|
[
|
||||||
"range_tombstone_fragmenter_test",
|
"range_tombstone_fragmenter_test",
|
||||||
"db/range_tombstone_fragmenter_test.cc",
|
"db/range_tombstone_fragmenter_test.cc",
|
||||||
|
74
appveyor.yml
74
appveyor.yml
@ -1,15 +1,75 @@
|
|||||||
version: 1.0.{build}
|
version: 1.0.{build}
|
||||||
|
|
||||||
image: Visual Studio 2017
|
image: Visual Studio 2017
|
||||||
|
|
||||||
|
environment:
|
||||||
|
JAVA_HOME: C:\Program Files\Java\jdk1.8.0
|
||||||
|
THIRDPARTY_HOME: $(APPVEYOR_BUILD_FOLDER)\thirdparty
|
||||||
|
SNAPPY_HOME: $(THIRDPARTY_HOME)\snappy-1.1.7
|
||||||
|
SNAPPY_INCLUDE: $(SNAPPY_HOME);$(SNAPPY_HOME)\build
|
||||||
|
SNAPPY_LIB_DEBUG: $(SNAPPY_HOME)\build\Debug\snappy.lib
|
||||||
|
SNAPPY_LIB_RELEASE: $(SNAPPY_HOME)\build\Release\snappy.lib
|
||||||
|
LZ4_HOME: $(THIRDPARTY_HOME)\lz4-1.8.3
|
||||||
|
LZ4_INCLUDE: $(LZ4_HOME)\lib
|
||||||
|
LZ4_LIB_DEBUG: $(LZ4_HOME)\visual\VS2010\bin\x64_Debug\liblz4_static.lib
|
||||||
|
LZ4_LIB_RELEASE: $(LZ4_HOME)\visual\VS2010\bin\x64_Release\liblz4_static.lib
|
||||||
|
ZSTD_HOME: $(THIRDPARTY_HOME)\zstd-1.4.0
|
||||||
|
ZSTD_INCLUDE: $(ZSTD_HOME)\lib;$(ZSTD_HOME)\lib\dictBuilder
|
||||||
|
ZSTD_LIB_DEBUG: $(ZSTD_HOME)\build\VS2010\bin\x64_Debug\libzstd_static.lib
|
||||||
|
ZSTD_LIB_RELEASE: $(ZSTD_HOME)\build\VS2010\bin\x64_Release\libzstd_static.lib
|
||||||
|
matrix:
|
||||||
|
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||||
|
CMAKE_GENERATOR: Visual Studio 14 Win64
|
||||||
|
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com
|
||||||
|
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
|
||||||
|
CMAKE_GENERATOR: Visual Studio 15 Win64
|
||||||
|
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\devenv.com
|
||||||
|
|
||||||
|
install:
|
||||||
|
- md %THIRDPARTY_HOME%
|
||||||
|
- echo "Building Snappy dependency..."
|
||||||
|
- cd %THIRDPARTY_HOME%
|
||||||
|
- curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip
|
||||||
|
- unzip snappy-1.1.7.zip
|
||||||
|
- cd snappy-1.1.7
|
||||||
|
- mkdir build
|
||||||
|
- cd build
|
||||||
|
- cmake -G "%CMAKE_GENERATOR%" ..
|
||||||
|
- msbuild Snappy.sln /p:Configuration=Debug /p:Platform=x64
|
||||||
|
- msbuild Snappy.sln /p:Configuration=Release /p:Platform=x64
|
||||||
|
- echo "Building LZ4 dependency..."
|
||||||
|
- cd %THIRDPARTY_HOME%
|
||||||
|
- curl --fail --silent --show-error --output lz4-1.8.3.zip --location https://github.com/lz4/lz4/archive/v1.8.3.zip
|
||||||
|
- unzip lz4-1.8.3.zip
|
||||||
|
- cd lz4-1.8.3\visual\VS2010
|
||||||
|
- ps: $CMD="$Env:DEV_ENV"; & $CMD lz4.sln /upgrade
|
||||||
|
- msbuild lz4.sln /p:Configuration=Debug /p:Platform=x64
|
||||||
|
- msbuild lz4.sln /p:Configuration=Release /p:Platform=x64
|
||||||
|
- echo "Building ZStd dependency..."
|
||||||
|
- cd %THIRDPARTY_HOME%
|
||||||
|
- curl --fail --silent --show-error --output zstd-1.4.0.zip --location https://github.com/facebook/zstd/archive/v1.4.0.zip
|
||||||
|
- unzip zstd-1.4.0.zip
|
||||||
|
- cd zstd-1.4.0\build\VS2010
|
||||||
|
- ps: $CMD="$Env:DEV_ENV"; & $CMD zstd.sln /upgrade
|
||||||
|
- msbuild zstd.sln /p:Configuration=Debug /p:Platform=x64
|
||||||
|
- msbuild zstd.sln /p:Configuration=Release /p:Platform=x64
|
||||||
|
|
||||||
before_build:
|
before_build:
|
||||||
- md %APPVEYOR_BUILD_FOLDER%\build
|
- md %APPVEYOR_BUILD_FOLDER%\build
|
||||||
- cd %APPVEYOR_BUILD_FOLDER%\build
|
- cd %APPVEYOR_BUILD_FOLDER%\build
|
||||||
- cmake -G "Visual Studio 15 Win64" -DOPTDBG=1 -DWITH_XPRESS=1 -DPORTABLE=1 ..
|
- cmake -G "%CMAKE_GENERATOR%" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1 ..
|
||||||
- cd ..
|
- cd ..
|
||||||
|
|
||||||
build:
|
build:
|
||||||
project: build\rocksdb.sln
|
project: build\rocksdb.sln
|
||||||
parallel: true
|
parallel: true
|
||||||
verbosity: normal
|
verbosity: normal
|
||||||
test:
|
|
||||||
test_script:
|
test:
|
||||||
- ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test2,db_test,env_basic_test,env_test -Concurrency 8
|
|
||||||
|
test_script:
|
||||||
|
- ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test2,db_test,env_basic_test,env_test,db_merge_operand_test -Concurrency 8
|
||||||
|
|
||||||
|
on_failure:
|
||||||
|
- cmd: 7z a build-failed.zip %APPVEYOR_BUILD_FOLDER%\build\ && appveyor PushArtifact build-failed.zip
|
||||||
|
|
||||||
|
@ -71,13 +71,11 @@ is_opt_mode = build_mode.startswith("opt")
|
|||||||
if is_opt_mode:
|
if is_opt_mode:
|
||||||
rocksdb_compiler_flags.append("-DNDEBUG")
|
rocksdb_compiler_flags.append("-DNDEBUG")
|
||||||
|
|
||||||
default_allocator = read_config("fbcode", "default_allocator")
|
|
||||||
|
|
||||||
sanitizer = read_config("fbcode", "sanitizer")
|
sanitizer = read_config("fbcode", "sanitizer")
|
||||||
|
|
||||||
# Let RocksDB aware of jemalloc existence.
|
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
|
||||||
# Do not enable it if sanitizer presents.
|
# whether the binary is linked with jemalloc at runtime.
|
||||||
if is_opt_mode and default_allocator.startswith("jemalloc") and sanitizer == "":
|
if sanitizer == "":
|
||||||
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
|
rocksdb_compiler_flags.append("-DROCKSDB_JEMALLOC")
|
||||||
rocksdb_external_deps.append(("jemalloc", None, "headers"))
|
rocksdb_external_deps.append(("jemalloc", None, "headers"))
|
||||||
"""
|
"""
|
||||||
|
@ -53,11 +53,13 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
|
|||||||
FBCODE_BUILD="true"
|
FBCODE_BUILD="true"
|
||||||
# If we're compiling with TSAN we need pic build
|
# If we're compiling with TSAN we need pic build
|
||||||
PIC_BUILD=$COMPILE_WITH_TSAN
|
PIC_BUILD=$COMPILE_WITH_TSAN
|
||||||
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
|
||||||
source "$PWD/build_tools/fbcode_config.sh"
|
|
||||||
else
|
|
||||||
# we need this to build with MySQL. Don't use for other purposes.
|
# we need this to build with MySQL. Don't use for other purposes.
|
||||||
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
source "$PWD/build_tools/fbcode_config4.8.1.sh"
|
||||||
|
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then
|
||||||
|
source "$PWD/build_tools/fbcode_config.sh"
|
||||||
|
else
|
||||||
|
source "$PWD/build_tools/fbcode_config_platform007.sh"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -525,6 +527,8 @@ if test -z "$PORTABLE"; then
|
|||||||
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^arm`"; then
|
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^arm`"; then
|
||||||
# TODO: Handle this with approprite options.
|
# TODO: Handle this with approprite options.
|
||||||
COMMON_FLAGS="$COMMON_FLAGS"
|
COMMON_FLAGS="$COMMON_FLAGS"
|
||||||
|
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then
|
||||||
|
COMMON_FLAGS="$COMMON_FLAGS"
|
||||||
elif [ "$TARGET_OS" == "IOS" ]; then
|
elif [ "$TARGET_OS" == "IOS" ]; then
|
||||||
COMMON_FLAGS="$COMMON_FLAGS"
|
COMMON_FLAGS="$COMMON_FLAGS"
|
||||||
elif [ "$TARGET_OS" != "AIX" ] && [ "$TARGET_OS" != "SunOS" ]; then
|
elif [ "$TARGET_OS" != "AIX" ] && [ "$TARGET_OS" != "SunOS" ]; then
|
||||||
|
18
build_tools/dependencies_platform007.sh
Normal file
18
build_tools/dependencies_platform007.sh
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
GCC_BASE=/mnt/gvfs/third-party2/gcc/6e8e715624fd15256a7970073387793dfcf79b46/7.x/centos7-native/b2ef2b6
|
||||||
|
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/ef37e1faa1c29782abfac1ae65a291b9b7966f6d/stable/centos7-native/c9f9104
|
||||||
|
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c67031f0f739ac61575a061518d6ef5038f99f90/7.x/platform007/5620abc
|
||||||
|
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/60d6f124a78798b73944f5ba87c2306ae3460153/2.26/platform007/f259413
|
||||||
|
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
|
||||||
|
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/22c2d65676fb7c23cfa797c4f6937f38b026f3cf/1.2.8/platform007/ca4da3d
|
||||||
|
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
|
||||||
|
LZ4_BASE=/mnt/gvfs/third-party2/lz4/907b498203d297947f3bb70b9466f47e100f1873/r131/platform007/ca4da3d
|
||||||
|
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/3ee276cbacfad3074e3f07bf826ac47f06970f4e/1.3.5/platform007/15a3614
|
||||||
|
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
|
||||||
|
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/9c910d36d6235cc40e8ff559358f1833452300ca/master/platform007/5b0f53e
|
||||||
|
NUMA_BASE=/mnt/gvfs/third-party2/numa/9cbf2460284c669ed19c3ccb200a71f7dd7e53c7/2.0.11/platform007/ca4da3d
|
||||||
|
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/bf3d7497fe4e6d007354f0adffa16ce3003f8338/1.3/platform007/6f3e0a9
|
||||||
|
TBB_BASE=/mnt/gvfs/third-party2/tbb/ff4e0b093534704d8abab678a4fd7f5ea7b094c7/2018_U5/platform007/ca4da3d
|
||||||
|
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/b5c4a61a5c483ba24722005ae07895971a2ac707/fb/platform007/da39a3e
|
||||||
|
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/92ff90349e2f43ea0a8246d8b1cf17b6869013e3/2.29.1/centos7-native/da39a3e
|
||||||
|
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/f3f697a28122e6bcd513273dd9c1ff23852fc59f/3.13.0/platform007/ca4da3d
|
||||||
|
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832
|
157
build_tools/fbcode_config_platform007.sh
Normal file
157
build_tools/fbcode_config_platform007.sh
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Set environment variables so that we can compile rocksdb using
|
||||||
|
# fbcode settings. It uses the latest g++ and clang compilers and also
|
||||||
|
# uses jemalloc
|
||||||
|
# Environment variables that change the behavior of this script:
|
||||||
|
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
|
||||||
|
|
||||||
|
|
||||||
|
BASEDIR=`dirname $BASH_SOURCE`
|
||||||
|
source "$BASEDIR/dependencies_platform007.sh"
|
||||||
|
|
||||||
|
CFLAGS=""
|
||||||
|
|
||||||
|
# libgcc
|
||||||
|
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
|
||||||
|
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
|
||||||
|
|
||||||
|
# glibc
|
||||||
|
GLIBC_INCLUDE="$GLIBC_BASE/include"
|
||||||
|
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
|
||||||
|
|
||||||
|
# snappy
|
||||||
|
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
|
||||||
|
else
|
||||||
|
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
|
||||||
|
fi
|
||||||
|
CFLAGS+=" -DSNAPPY"
|
||||||
|
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
# location of zlib headers and libraries
|
||||||
|
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
|
||||||
|
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
|
||||||
|
CFLAGS+=" -DZLIB"
|
||||||
|
|
||||||
|
# location of bzip headers and libraries
|
||||||
|
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
|
||||||
|
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
|
||||||
|
CFLAGS+=" -DBZIP2"
|
||||||
|
|
||||||
|
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
|
||||||
|
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
|
||||||
|
CFLAGS+=" -DLZ4"
|
||||||
|
fi
|
||||||
|
|
||||||
|
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
|
||||||
|
else
|
||||||
|
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
|
||||||
|
fi
|
||||||
|
CFLAGS+=" -DZSTD"
|
||||||
|
|
||||||
|
# location of gflags headers and libraries
|
||||||
|
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
|
||||||
|
else
|
||||||
|
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
|
||||||
|
fi
|
||||||
|
CFLAGS+=" -DGFLAGS=gflags"
|
||||||
|
|
||||||
|
# location of jemalloc
|
||||||
|
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
|
||||||
|
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
|
||||||
|
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
# location of numa
|
||||||
|
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
|
||||||
|
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
|
||||||
|
CFLAGS+=" -DNUMA"
|
||||||
|
|
||||||
|
# location of libunwind
|
||||||
|
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# location of TBB
|
||||||
|
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
|
||||||
|
if test -z $PIC_BUILD; then
|
||||||
|
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
|
||||||
|
else
|
||||||
|
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
|
||||||
|
fi
|
||||||
|
CFLAGS+=" -DTBB"
|
||||||
|
|
||||||
|
# use Intel SSE support for checksum calculations
|
||||||
|
export USE_SSE=1
|
||||||
|
export PORTABLE=1
|
||||||
|
|
||||||
|
BINUTILS="$BINUTILS_BASE/bin"
|
||||||
|
AR="$BINUTILS/ar"
|
||||||
|
|
||||||
|
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
|
||||||
|
|
||||||
|
STDLIBS="-L $GCC_BASE/lib64"
|
||||||
|
|
||||||
|
CLANG_BIN="$CLANG_BASE/bin"
|
||||||
|
CLANG_LIB="$CLANG_BASE/lib"
|
||||||
|
CLANG_SRC="$CLANG_BASE/../../src"
|
||||||
|
|
||||||
|
CLANG_ANALYZER="$CLANG_BIN/clang++"
|
||||||
|
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
|
||||||
|
|
||||||
|
if [ -z "$USE_CLANG" ]; then
|
||||||
|
# gcc
|
||||||
|
CC="$GCC_BASE/bin/gcc"
|
||||||
|
CXX="$GCC_BASE/bin/g++"
|
||||||
|
|
||||||
|
CFLAGS+=" -B$BINUTILS/gold"
|
||||||
|
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||||
|
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||||
|
JEMALLOC=1
|
||||||
|
else
|
||||||
|
# clang
|
||||||
|
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
|
||||||
|
CC="$CLANG_BIN/clang"
|
||||||
|
CXX="$CLANG_BIN/clang++"
|
||||||
|
|
||||||
|
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
|
||||||
|
|
||||||
|
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
|
||||||
|
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
|
||||||
|
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
|
||||||
|
CFLAGS+=" -isystem $GLIBC_INCLUDE"
|
||||||
|
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
|
||||||
|
CFLAGS+=" -isystem $CLANG_INCLUDE"
|
||||||
|
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
|
||||||
|
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
|
||||||
|
CFLAGS+=" -Wno-expansion-to-defined "
|
||||||
|
CXXFLAGS="-nostdinc++"
|
||||||
|
fi
|
||||||
|
|
||||||
|
CFLAGS+=" $DEPS_INCLUDE"
|
||||||
|
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
|
||||||
|
CXXFLAGS+=" $CFLAGS"
|
||||||
|
|
||||||
|
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
|
||||||
|
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
|
||||||
|
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
|
||||||
|
EXEC_LDFLAGS+=" $LIBUNWIND"
|
||||||
|
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
|
||||||
|
# required by libtbb
|
||||||
|
EXEC_LDFLAGS+=" -ldl"
|
||||||
|
|
||||||
|
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
|
||||||
|
|
||||||
|
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
|
||||||
|
|
||||||
|
VALGRIND_VER="$VALGRIND_BASE/bin/"
|
||||||
|
|
||||||
|
# lua not supported because it's on track for deprecation, I think
|
||||||
|
LUA_PATH=
|
||||||
|
LUA_LIB=
|
||||||
|
|
||||||
|
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
|
@ -377,7 +377,7 @@ function send_to_ods {
|
|||||||
echo >&2 "ERROR: Key $key doesn't have a value."
|
echo >&2 "ERROR: Key $key doesn't have a value."
|
||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \
|
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \
|
||||||
--connect-timeout 60
|
--connect-timeout 60
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -85,7 +85,9 @@ NON_SHM="TMPD=/tmp/rocksdb_test_tmp"
|
|||||||
GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1"
|
GCC_481="ROCKSDB_FBCODE_BUILD_WITH_481=1"
|
||||||
ASAN="COMPILE_WITH_ASAN=1"
|
ASAN="COMPILE_WITH_ASAN=1"
|
||||||
CLANG="USE_CLANG=1"
|
CLANG="USE_CLANG=1"
|
||||||
TSAN="COMPILE_WITH_TSAN=1"
|
# in gcc-5 there are known problems with TSAN like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71090.
|
||||||
|
# using platform007 gives us gcc-8 or higher which has that bug fixed.
|
||||||
|
TSAN="ROCKSDB_FBCODE_BUILD_WITH_PLATFORM007=1 COMPILE_WITH_TSAN=1"
|
||||||
UBSAN="COMPILE_WITH_UBSAN=1"
|
UBSAN="COMPILE_WITH_UBSAN=1"
|
||||||
TSAN_CRASH='CRASH_TEST_EXT_ARGS="--compression_type=zstd --log2_keys_per_lock=22"'
|
TSAN_CRASH='CRASH_TEST_EXT_ARGS="--compression_type=zstd --log2_keys_per_lock=22"'
|
||||||
NON_TSAN_CRASH="CRASH_TEST_EXT_ARGS=--compression_type=zstd"
|
NON_TSAN_CRASH="CRASH_TEST_EXT_ARGS=--compression_type=zstd"
|
||||||
@ -644,7 +646,7 @@ run_regression()
|
|||||||
|
|
||||||
# parameters: $1 -- key, $2 -- value
|
# parameters: $1 -- key, $2 -- value
|
||||||
function send_size_to_ods {
|
function send_size_to_ods {
|
||||||
curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=rocksdb.build_size.$1&value=$2" \
|
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=rocksdb.build_size.$1&value=$2" \
|
||||||
--connect-timeout 60
|
--connect-timeout 60
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,6 +53,45 @@ function get_lib_base()
|
|||||||
log_variable $__res_var
|
log_variable $__res_var
|
||||||
}
|
}
|
||||||
|
|
||||||
|
###########################################################
|
||||||
|
# platform007 dependencies #
|
||||||
|
###########################################################
|
||||||
|
|
||||||
|
OUTPUT="$BASEDIR/dependencies_platform007.sh"
|
||||||
|
|
||||||
|
rm -f "$OUTPUT"
|
||||||
|
touch "$OUTPUT"
|
||||||
|
|
||||||
|
echo "Writing dependencies to $OUTPUT"
|
||||||
|
|
||||||
|
# Compilers locations
|
||||||
|
GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/`
|
||||||
|
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
|
||||||
|
|
||||||
|
log_variable GCC_BASE
|
||||||
|
log_variable CLANG_BASE
|
||||||
|
|
||||||
|
# Libraries locations
|
||||||
|
get_lib_base libgcc 7.x platform007
|
||||||
|
get_lib_base glibc 2.26 platform007
|
||||||
|
get_lib_base snappy LATEST platform007
|
||||||
|
get_lib_base zlib LATEST platform007
|
||||||
|
get_lib_base bzip2 LATEST platform007
|
||||||
|
get_lib_base lz4 LATEST platform007
|
||||||
|
get_lib_base zstd LATEST platform007
|
||||||
|
get_lib_base gflags LATEST platform007
|
||||||
|
get_lib_base jemalloc LATEST platform007
|
||||||
|
get_lib_base numa LATEST platform007
|
||||||
|
get_lib_base libunwind LATEST platform007
|
||||||
|
get_lib_base tbb LATEST platform007
|
||||||
|
|
||||||
|
get_lib_base kernel-headers fb platform007
|
||||||
|
get_lib_base binutils LATEST centos7-native
|
||||||
|
get_lib_base valgrind LATEST platform007
|
||||||
|
get_lib_base lua 5.3.4 platform007
|
||||||
|
|
||||||
|
git diff $OUTPUT
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
# 5.x dependencies #
|
# 5.x dependencies #
|
||||||
###########################################################
|
###########################################################
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "db/event_helpers.h"
|
#include "db/event_helpers.h"
|
||||||
#include "db/internal_stats.h"
|
#include "db/internal_stats.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/table_cache.h"
|
#include "db/table_cache.h"
|
||||||
#include "db/version_edit.h"
|
#include "db/version_edit.h"
|
||||||
#include "monitoring/iostats_context_imp.h"
|
#include "monitoring/iostats_context_imp.h"
|
||||||
@ -65,8 +66,9 @@ Status BuildTable(
|
|||||||
const std::string& dbname, Env* env, const ImmutableCFOptions& ioptions,
|
const std::string& dbname, Env* env, const ImmutableCFOptions& ioptions,
|
||||||
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
|
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
|
||||||
TableCache* table_cache, InternalIterator* iter,
|
TableCache* table_cache, InternalIterator* iter,
|
||||||
std::unique_ptr<InternalIterator> range_del_iter, FileMetaData* meta,
|
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
const InternalKeyComparator& internal_comparator,
|
range_del_iters,
|
||||||
|
FileMetaData* meta, const InternalKeyComparator& internal_comparator,
|
||||||
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
|
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
|
||||||
int_tbl_prop_collector_factories,
|
int_tbl_prop_collector_factories,
|
||||||
uint32_t column_family_id, const std::string& column_family_name,
|
uint32_t column_family_id, const std::string& column_family_name,
|
||||||
@ -86,12 +88,10 @@ Status BuildTable(
|
|||||||
Status s;
|
Status s;
|
||||||
meta->fd.file_size = 0;
|
meta->fd.file_size = 0;
|
||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
std::unique_ptr<RangeDelAggregator> range_del_agg(
|
std::unique_ptr<CompactionRangeDelAggregator> range_del_agg(
|
||||||
new RangeDelAggregator(internal_comparator, snapshots));
|
new CompactionRangeDelAggregator(&internal_comparator, snapshots));
|
||||||
s = range_del_agg->AddTombstones(std::move(range_del_iter));
|
for (auto& range_del_iter : range_del_iters) {
|
||||||
if (!s.ok()) {
|
range_del_agg->AddTombstones(std::move(range_del_iter));
|
||||||
// may be non-ok if a range tombstone key is unparsable
|
|
||||||
return s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(),
|
std::string fname = TableFileName(ioptions.cf_paths, meta->fd.GetNumber(),
|
||||||
@ -158,8 +158,10 @@ Status BuildTable(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto it = range_del_agg->NewIterator(); it->Valid(); it->Next()) {
|
auto range_del_it = range_del_agg->NewIterator();
|
||||||
auto tombstone = it->Tombstone();
|
for (range_del_it->SeekToFirst(); range_del_it->Valid();
|
||||||
|
range_del_it->Next()) {
|
||||||
|
auto tombstone = range_del_it->Tombstone();
|
||||||
auto kv = tombstone.Serialize();
|
auto kv = tombstone.Serialize();
|
||||||
builder->Add(kv.first.Encode(), kv.second);
|
builder->Add(kv.first.Encode(), kv.second);
|
||||||
meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
|
meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "db/table_properties_collector.h"
|
#include "db/table_properties_collector.h"
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
@ -65,8 +66,9 @@ extern Status BuildTable(
|
|||||||
const std::string& dbname, Env* env, const ImmutableCFOptions& options,
|
const std::string& dbname, Env* env, const ImmutableCFOptions& options,
|
||||||
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
|
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
|
||||||
TableCache* table_cache, InternalIterator* iter,
|
TableCache* table_cache, InternalIterator* iter,
|
||||||
std::unique_ptr<InternalIterator> range_del_iter, FileMetaData* meta,
|
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
const InternalKeyComparator& internal_comparator,
|
range_del_iters,
|
||||||
|
FileMetaData* meta, const InternalKeyComparator& internal_comparator,
|
||||||
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
|
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
|
||||||
int_tbl_prop_collector_factories,
|
int_tbl_prop_collector_factories,
|
||||||
uint32_t column_family_id, const std::string& column_family_name,
|
uint32_t column_family_id, const std::string& column_family_name,
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
#include "db/internal_stats.h"
|
#include "db/internal_stats.h"
|
||||||
#include "db/job_context.h"
|
#include "db/job_context.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/table_properties_collector.h"
|
#include "db/table_properties_collector.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/write_controller.h"
|
#include "db/write_controller.h"
|
||||||
@ -945,7 +945,7 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
|
|||||||
ScopedArenaIterator memtable_iter(merge_iter_builder.Finish());
|
ScopedArenaIterator memtable_iter(merge_iter_builder.Finish());
|
||||||
|
|
||||||
auto read_seq = super_version->current->version_set()->LastSequence();
|
auto read_seq = super_version->current->version_set()->LastSequence();
|
||||||
RangeDelAggregatorV2 range_del_agg(&internal_comparator_, read_seq);
|
ReadRangeDelAggregator range_del_agg(&internal_comparator_, read_seq);
|
||||||
auto* active_range_del_iter =
|
auto* active_range_del_iter =
|
||||||
super_version->mem->NewRangeTombstoneIterator(read_opts, read_seq);
|
super_version->mem->NewRangeTombstoneIterator(read_opts, read_seq);
|
||||||
range_del_agg.AddTombstones(
|
range_del_agg.AddTombstones(
|
||||||
|
@ -67,10 +67,11 @@ class CompactedDBImpl : public DBImpl {
|
|||||||
virtual Status EnableFileDeletions(bool /*force*/) override {
|
virtual Status EnableFileDeletions(bool /*force*/) override {
|
||||||
return Status::NotSupported("Not supported in compacted db mode.");
|
return Status::NotSupported("Not supported in compacted db mode.");
|
||||||
}
|
}
|
||||||
virtual Status GetLiveFiles(std::vector<std::string>&,
|
virtual Status GetLiveFiles(std::vector<std::string>& ret,
|
||||||
uint64_t* /*manifest_file_size*/,
|
uint64_t* manifest_file_size,
|
||||||
bool /*flush_memtable*/ = true) override {
|
bool /*flush_memtable*/) override {
|
||||||
return Status::NotSupported("Not supported in compacted db mode.");
|
return DBImpl::GetLiveFiles(ret, manifest_file_size,
|
||||||
|
false /* flush_memtable */);
|
||||||
}
|
}
|
||||||
using DBImpl::Flush;
|
using DBImpl::Flush;
|
||||||
virtual Status Flush(const FlushOptions& /*options*/,
|
virtual Status Flush(const FlushOptions& /*options*/,
|
||||||
|
@ -18,7 +18,7 @@ CompactionIterator::CompactionIterator(
|
|||||||
SequenceNumber earliest_write_conflict_snapshot,
|
SequenceNumber earliest_write_conflict_snapshot,
|
||||||
const SnapshotChecker* snapshot_checker, Env* env,
|
const SnapshotChecker* snapshot_checker, Env* env,
|
||||||
bool report_detailed_time, bool expect_valid_internal_key,
|
bool report_detailed_time, bool expect_valid_internal_key,
|
||||||
RangeDelAggregator* range_del_agg, const Compaction* compaction,
|
CompactionRangeDelAggregator* range_del_agg, const Compaction* compaction,
|
||||||
const CompactionFilter* compaction_filter,
|
const CompactionFilter* compaction_filter,
|
||||||
const std::atomic<bool>* shutting_down,
|
const std::atomic<bool>* shutting_down,
|
||||||
const SequenceNumber preserve_deletes_seqnum)
|
const SequenceNumber preserve_deletes_seqnum)
|
||||||
@ -36,7 +36,7 @@ CompactionIterator::CompactionIterator(
|
|||||||
SequenceNumber earliest_write_conflict_snapshot,
|
SequenceNumber earliest_write_conflict_snapshot,
|
||||||
const SnapshotChecker* snapshot_checker, Env* env,
|
const SnapshotChecker* snapshot_checker, Env* env,
|
||||||
bool report_detailed_time, bool expect_valid_internal_key,
|
bool report_detailed_time, bool expect_valid_internal_key,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
std::unique_ptr<CompactionProxy> compaction,
|
std::unique_ptr<CompactionProxy> compaction,
|
||||||
const CompactionFilter* compaction_filter,
|
const CompactionFilter* compaction_filter,
|
||||||
const std::atomic<bool>* shutting_down,
|
const std::atomic<bool>* shutting_down,
|
||||||
|
@ -64,7 +64,7 @@ class CompactionIterator {
|
|||||||
SequenceNumber earliest_write_conflict_snapshot,
|
SequenceNumber earliest_write_conflict_snapshot,
|
||||||
const SnapshotChecker* snapshot_checker, Env* env,
|
const SnapshotChecker* snapshot_checker, Env* env,
|
||||||
bool report_detailed_time, bool expect_valid_internal_key,
|
bool report_detailed_time, bool expect_valid_internal_key,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
const Compaction* compaction = nullptr,
|
const Compaction* compaction = nullptr,
|
||||||
const CompactionFilter* compaction_filter = nullptr,
|
const CompactionFilter* compaction_filter = nullptr,
|
||||||
const std::atomic<bool>* shutting_down = nullptr,
|
const std::atomic<bool>* shutting_down = nullptr,
|
||||||
@ -77,7 +77,7 @@ class CompactionIterator {
|
|||||||
SequenceNumber earliest_write_conflict_snapshot,
|
SequenceNumber earliest_write_conflict_snapshot,
|
||||||
const SnapshotChecker* snapshot_checker, Env* env,
|
const SnapshotChecker* snapshot_checker, Env* env,
|
||||||
bool report_detailed_time, bool expect_valid_internal_key,
|
bool report_detailed_time, bool expect_valid_internal_key,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
std::unique_ptr<CompactionProxy> compaction,
|
std::unique_ptr<CompactionProxy> compaction,
|
||||||
const CompactionFilter* compaction_filter = nullptr,
|
const CompactionFilter* compaction_filter = nullptr,
|
||||||
const std::atomic<bool>* shutting_down = nullptr,
|
const std::atomic<bool>* shutting_down = nullptr,
|
||||||
@ -141,7 +141,7 @@ class CompactionIterator {
|
|||||||
Env* env_;
|
Env* env_;
|
||||||
bool report_detailed_time_;
|
bool report_detailed_time_;
|
||||||
bool expect_valid_internal_key_;
|
bool expect_valid_internal_key_;
|
||||||
RangeDelAggregator* range_del_agg_;
|
CompactionRangeDelAggregator* range_del_agg_;
|
||||||
std::unique_ptr<CompactionProxy> compaction_;
|
std::unique_ptr<CompactionProxy> compaction_;
|
||||||
const CompactionFilter* compaction_filter_;
|
const CompactionFilter* compaction_filter_;
|
||||||
const std::atomic<bool>* shutting_down_;
|
const std::atomic<bool>* shutting_down_;
|
||||||
|
@ -221,10 +221,15 @@ class CompactionIteratorTest : public testing::TestWithParam<bool> {
|
|||||||
MergeOperator* merge_op = nullptr, CompactionFilter* filter = nullptr,
|
MergeOperator* merge_op = nullptr, CompactionFilter* filter = nullptr,
|
||||||
bool bottommost_level = false,
|
bool bottommost_level = false,
|
||||||
SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) {
|
SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) {
|
||||||
std::unique_ptr<InternalIterator> range_del_iter(
|
std::unique_ptr<InternalIterator> unfragmented_range_del_iter(
|
||||||
new test::VectorIterator(range_del_ks, range_del_vs));
|
new test::VectorIterator(range_del_ks, range_del_vs));
|
||||||
range_del_agg_.reset(new RangeDelAggregator(icmp_, snapshots_));
|
auto tombstone_list = std::make_shared<FragmentedRangeTombstoneList>(
|
||||||
ASSERT_OK(range_del_agg_->AddTombstones(std::move(range_del_iter)));
|
std::move(unfragmented_range_del_iter), icmp_);
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
||||||
|
new FragmentedRangeTombstoneIterator(tombstone_list, icmp_,
|
||||||
|
kMaxSequenceNumber));
|
||||||
|
range_del_agg_.reset(new CompactionRangeDelAggregator(&icmp_, snapshots_));
|
||||||
|
range_del_agg_->AddTombstones(std::move(range_del_iter));
|
||||||
|
|
||||||
std::unique_ptr<CompactionIterator::CompactionProxy> compaction;
|
std::unique_ptr<CompactionIterator::CompactionProxy> compaction;
|
||||||
if (filter || bottommost_level) {
|
if (filter || bottommost_level) {
|
||||||
@ -292,7 +297,7 @@ class CompactionIteratorTest : public testing::TestWithParam<bool> {
|
|||||||
std::unique_ptr<MergeHelper> merge_helper_;
|
std::unique_ptr<MergeHelper> merge_helper_;
|
||||||
std::unique_ptr<LoggingForwardVectorIterator> iter_;
|
std::unique_ptr<LoggingForwardVectorIterator> iter_;
|
||||||
std::unique_ptr<CompactionIterator> c_iter_;
|
std::unique_ptr<CompactionIterator> c_iter_;
|
||||||
std::unique_ptr<RangeDelAggregator> range_del_agg_;
|
std::unique_ptr<CompactionRangeDelAggregator> range_del_agg_;
|
||||||
std::unique_ptr<SnapshotChecker> snapshot_checker_;
|
std::unique_ptr<SnapshotChecker> snapshot_checker_;
|
||||||
std::atomic<bool> shutting_down_{false};
|
std::atomic<bool> shutting_down_{false};
|
||||||
FakeCompaction* compaction_proxy_;
|
FakeCompaction* compaction_proxy_;
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
#include "db/memtable_list.h"
|
#include "db/memtable_list.h"
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "monitoring/iostats_context_imp.h"
|
#include "monitoring/iostats_context_imp.h"
|
||||||
#include "monitoring/perf_context_imp.h"
|
#include "monitoring/perf_context_imp.h"
|
||||||
@ -805,15 +805,13 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) {
|
|||||||
void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
||||||
assert(sub_compact != nullptr);
|
assert(sub_compact != nullptr);
|
||||||
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
|
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
|
||||||
RangeDelAggregatorV2 range_del_agg_v2(&cfd->internal_comparator(),
|
CompactionRangeDelAggregator range_del_agg(&cfd->internal_comparator(),
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
existing_snapshots_);
|
||||||
auto* range_del_agg =
|
|
||||||
range_del_agg_v2.DelegateToRangeDelAggregator(existing_snapshots_);
|
|
||||||
|
|
||||||
// Although the v2 aggregator is what the level iterator(s) know about,
|
// Although the v2 aggregator is what the level iterator(s) know about,
|
||||||
// the AddTombstones calls will be propagated down to the v1 aggregator.
|
// the AddTombstones calls will be propagated down to the v1 aggregator.
|
||||||
std::unique_ptr<InternalIterator> input(versions_->MakeInputIterator(
|
std::unique_ptr<InternalIterator> input(versions_->MakeInputIterator(
|
||||||
sub_compact->compaction, &range_del_agg_v2, env_optiosn_for_read_));
|
sub_compact->compaction, &range_del_agg, env_optiosn_for_read_));
|
||||||
|
|
||||||
AutoThreadOperationStageUpdater stage_updater(
|
AutoThreadOperationStageUpdater stage_updater(
|
||||||
ThreadStatus::STAGE_COMPACTION_PROCESS_KV);
|
ThreadStatus::STAGE_COMPACTION_PROCESS_KV);
|
||||||
@ -902,8 +900,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|||||||
input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(),
|
input.get(), cfd->user_comparator(), &merge, versions_->LastSequence(),
|
||||||
&existing_snapshots_, earliest_write_conflict_snapshot_,
|
&existing_snapshots_, earliest_write_conflict_snapshot_,
|
||||||
snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_), false,
|
snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_), false,
|
||||||
range_del_agg, sub_compact->compaction, compaction_filter, shutting_down_,
|
&range_del_agg, sub_compact->compaction, compaction_filter,
|
||||||
preserve_deletes_seqnum_));
|
shutting_down_, preserve_deletes_seqnum_));
|
||||||
auto c_iter = sub_compact->c_iter.get();
|
auto c_iter = sub_compact->c_iter.get();
|
||||||
c_iter->SeekToFirst();
|
c_iter->SeekToFirst();
|
||||||
if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) {
|
if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) {
|
||||||
@ -1041,7 +1039,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|||||||
}
|
}
|
||||||
CompactionIterationStats range_del_out_stats;
|
CompactionIterationStats range_del_out_stats;
|
||||||
status =
|
status =
|
||||||
FinishCompactionOutputFile(input_status, sub_compact, range_del_agg,
|
FinishCompactionOutputFile(input_status, sub_compact, &range_del_agg,
|
||||||
&range_del_out_stats, next_key);
|
&range_del_out_stats, next_key);
|
||||||
RecordDroppedKeys(range_del_out_stats,
|
RecordDroppedKeys(range_del_out_stats,
|
||||||
&sub_compact->compaction_job_stats);
|
&sub_compact->compaction_job_stats);
|
||||||
@ -1092,8 +1090,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (status.ok() && sub_compact->builder == nullptr &&
|
if (status.ok() && sub_compact->builder == nullptr &&
|
||||||
sub_compact->outputs.size() == 0 &&
|
sub_compact->outputs.size() == 0 && !range_del_agg.IsEmpty()) {
|
||||||
!range_del_agg->IsEmpty()) {
|
|
||||||
// handle subcompaction containing only range deletions
|
// handle subcompaction containing only range deletions
|
||||||
status = OpenCompactionOutputFile(sub_compact);
|
status = OpenCompactionOutputFile(sub_compact);
|
||||||
}
|
}
|
||||||
@ -1102,7 +1099,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|||||||
// close the output file.
|
// close the output file.
|
||||||
if (sub_compact->builder != nullptr) {
|
if (sub_compact->builder != nullptr) {
|
||||||
CompactionIterationStats range_del_out_stats;
|
CompactionIterationStats range_del_out_stats;
|
||||||
Status s = FinishCompactionOutputFile(status, sub_compact, range_del_agg,
|
Status s = FinishCompactionOutputFile(status, sub_compact, &range_del_agg,
|
||||||
&range_del_out_stats);
|
&range_del_out_stats);
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
status = s;
|
status = s;
|
||||||
@ -1168,7 +1165,7 @@ void CompactionJob::RecordDroppedKeys(
|
|||||||
|
|
||||||
Status CompactionJob::FinishCompactionOutputFile(
|
Status CompactionJob::FinishCompactionOutputFile(
|
||||||
const Status& input_status, SubcompactionState* sub_compact,
|
const Status& input_status, SubcompactionState* sub_compact,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
CompactionIterationStats* range_del_out_stats,
|
CompactionIterationStats* range_del_out_stats,
|
||||||
const Slice* next_table_min_key /* = nullptr */) {
|
const Slice* next_table_min_key /* = nullptr */) {
|
||||||
AutoThreadOperationStageUpdater stage_updater(
|
AutoThreadOperationStageUpdater stage_updater(
|
||||||
@ -1207,10 +1204,19 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|||||||
lower_bound = nullptr;
|
lower_bound = nullptr;
|
||||||
}
|
}
|
||||||
if (next_table_min_key != nullptr) {
|
if (next_table_min_key != nullptr) {
|
||||||
// This isn't the last file in the subcompaction, so extend until the next
|
// This may be the last file in the subcompaction in some cases, so we
|
||||||
// file starts.
|
// need to compare the end key of subcompaction with the next file start
|
||||||
|
// key. When the end key is chosen by the subcompaction, we know that
|
||||||
|
// it must be the biggest key in output file. Therefore, it is safe to
|
||||||
|
// use the smaller key as the upper bound of the output file, to ensure
|
||||||
|
// that there is no overlapping between different output files.
|
||||||
upper_bound_guard = ExtractUserKey(*next_table_min_key);
|
upper_bound_guard = ExtractUserKey(*next_table_min_key);
|
||||||
|
if (sub_compact->end != nullptr &&
|
||||||
|
ucmp->Compare(upper_bound_guard, *sub_compact->end) >= 0) {
|
||||||
|
upper_bound = sub_compact->end;
|
||||||
|
} else {
|
||||||
upper_bound = &upper_bound_guard;
|
upper_bound = &upper_bound_guard;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// This is the last file in the subcompaction, so extend until the
|
// This is the last file in the subcompaction, so extend until the
|
||||||
// subcompaction ends.
|
// subcompaction ends.
|
||||||
@ -1220,11 +1226,6 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|||||||
if (existing_snapshots_.size() > 0) {
|
if (existing_snapshots_.size() > 0) {
|
||||||
earliest_snapshot = existing_snapshots_[0];
|
earliest_snapshot = existing_snapshots_[0];
|
||||||
}
|
}
|
||||||
auto it = range_del_agg->NewIterator();
|
|
||||||
if (lower_bound != nullptr) {
|
|
||||||
it->Seek(*lower_bound);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_overlapping_endpoints;
|
bool has_overlapping_endpoints;
|
||||||
if (upper_bound != nullptr && meta->largest.size() > 0) {
|
if (upper_bound != nullptr && meta->largest.size() > 0) {
|
||||||
has_overlapping_endpoints =
|
has_overlapping_endpoints =
|
||||||
@ -1232,6 +1233,24 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|||||||
} else {
|
} else {
|
||||||
has_overlapping_endpoints = false;
|
has_overlapping_endpoints = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The end key of the subcompaction must be bigger or equal to the upper
|
||||||
|
// bound. If the end of subcompaction is null or the upper bound is null,
|
||||||
|
// it means that this file is the last file in the compaction. So there
|
||||||
|
// will be no overlapping between this file and others.
|
||||||
|
assert(sub_compact->end == nullptr ||
|
||||||
|
upper_bound == nullptr ||
|
||||||
|
ucmp->Compare(*upper_bound , *sub_compact->end) <= 0);
|
||||||
|
auto it = range_del_agg->NewIterator(lower_bound, upper_bound,
|
||||||
|
has_overlapping_endpoints);
|
||||||
|
// Position the range tombstone output iterator. There may be tombstone
|
||||||
|
// fragments that are entirely out of range, so make sure that we do not
|
||||||
|
// include those.
|
||||||
|
if (lower_bound != nullptr) {
|
||||||
|
it->Seek(*lower_bound);
|
||||||
|
} else {
|
||||||
|
it->SeekToFirst();
|
||||||
|
}
|
||||||
for (; it->Valid(); it->Next()) {
|
for (; it->Valid(); it->Next()) {
|
||||||
auto tombstone = it->Tombstone();
|
auto tombstone = it->Tombstone();
|
||||||
if (upper_bound != nullptr) {
|
if (upper_bound != nullptr) {
|
||||||
@ -1257,6 +1276,8 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto kv = tombstone.Serialize();
|
auto kv = tombstone.Serialize();
|
||||||
|
assert(lower_bound == nullptr ||
|
||||||
|
ucmp->Compare(*lower_bound, kv.second) < 0);
|
||||||
sub_compact->builder->Add(kv.first.Encode(), kv.second);
|
sub_compact->builder->Add(kv.first.Encode(), kv.second);
|
||||||
InternalKey smallest_candidate = std::move(kv.first);
|
InternalKey smallest_candidate = std::move(kv.first);
|
||||||
if (lower_bound != nullptr &&
|
if (lower_bound != nullptr &&
|
||||||
|
@ -29,8 +29,8 @@
|
|||||||
#include "db/version_edit.h"
|
#include "db/version_edit.h"
|
||||||
#include "db/write_controller.h"
|
#include "db/write_controller.h"
|
||||||
#include "db/write_thread.h"
|
#include "db/write_thread.h"
|
||||||
#include "options/db_options.h"
|
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
|
#include "options/db_options.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "rocksdb/compaction_job_stats.h"
|
#include "rocksdb/compaction_job_stats.h"
|
||||||
@ -104,7 +104,7 @@ class CompactionJob {
|
|||||||
|
|
||||||
Status FinishCompactionOutputFile(
|
Status FinishCompactionOutputFile(
|
||||||
const Status& input_status, SubcompactionState* sub_compact,
|
const Status& input_status, SubcompactionState* sub_compact,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
CompactionIterationStats* range_del_out_stats,
|
CompactionIterationStats* range_del_out_stats,
|
||||||
const Slice* next_table_min_key = nullptr);
|
const Slice* next_table_min_key = nullptr);
|
||||||
Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options);
|
Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options);
|
||||||
|
@ -340,7 +340,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
|
|||||||
Arena arena;
|
Arena arena;
|
||||||
{
|
{
|
||||||
InternalKeyComparator icmp(options.comparator);
|
InternalKeyComparator icmp(options.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
||||||
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
|
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
|
||||||
@ -430,7 +430,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilter) {
|
|||||||
count = 0;
|
count = 0;
|
||||||
{
|
{
|
||||||
InternalKeyComparator icmp(options.comparator);
|
InternalKeyComparator icmp(options.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
||||||
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
|
&arena, &range_del_agg, kMaxSequenceNumber, handles_[1]));
|
||||||
@ -648,7 +648,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) {
|
|||||||
int total = 0;
|
int total = 0;
|
||||||
Arena arena;
|
Arena arena;
|
||||||
InternalKeyComparator icmp(options.comparator);
|
InternalKeyComparator icmp(options.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* snapshots */);
|
kMaxSequenceNumber /* snapshots */);
|
||||||
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
ScopedArenaIterator iter(dbfull()->NewInternalIterator(
|
||||||
&arena, &range_del_agg, kMaxSequenceNumber));
|
&arena, &range_del_agg, kMaxSequenceNumber));
|
||||||
|
@ -407,6 +407,87 @@ TEST_P(DBAtomicFlushTest, AtomicFlushRollbackSomeJobs) {
|
|||||||
Destroy(options);
|
Destroy(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(DBAtomicFlushTest, FlushMultipleCFs_DropSomeBeforeRequestFlush) {
|
||||||
|
bool atomic_flush = GetParam();
|
||||||
|
if (!atomic_flush) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.atomic_flush = atomic_flush;
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
||||||
|
size_t num_cfs = handles_.size();
|
||||||
|
ASSERT_EQ(3, num_cfs);
|
||||||
|
WriteOptions wopts;
|
||||||
|
wopts.disableWAL = true;
|
||||||
|
std::vector<int> cf_ids;
|
||||||
|
for (size_t i = 0; i != num_cfs; ++i) {
|
||||||
|
int cf_id = static_cast<int>(i);
|
||||||
|
ASSERT_OK(Put(cf_id, "key", "value", wopts));
|
||||||
|
cf_ids.push_back(cf_id);
|
||||||
|
}
|
||||||
|
ASSERT_OK(dbfull()->DropColumnFamily(handles_[1]));
|
||||||
|
ASSERT_TRUE(Flush(cf_ids).IsShutdownInProgress());
|
||||||
|
Destroy(options);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(DBAtomicFlushTest,
|
||||||
|
FlushMultipleCFs_DropSomeAfterScheduleFlushBeforeFlushJobRun) {
|
||||||
|
bool atomic_flush = GetParam();
|
||||||
|
if (!atomic_flush) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.create_if_missing = true;
|
||||||
|
options.atomic_flush = atomic_flush;
|
||||||
|
|
||||||
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
|
SyncPoint::GetInstance()->LoadDependency(
|
||||||
|
{{"DBImpl::AtomicFlushMemTables:AfterScheduleFlush",
|
||||||
|
"DBAtomicFlushTest::BeforeDropCF"},
|
||||||
|
{"DBAtomicFlushTest::AfterDropCF",
|
||||||
|
"DBImpl::BackgroundCallFlush:start"}});
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
size_t num_cfs = handles_.size();
|
||||||
|
ASSERT_EQ(3, num_cfs);
|
||||||
|
WriteOptions wopts;
|
||||||
|
wopts.disableWAL = true;
|
||||||
|
for (size_t i = 0; i != num_cfs; ++i) {
|
||||||
|
int cf_id = static_cast<int>(i);
|
||||||
|
ASSERT_OK(Put(cf_id, "key", "value", wopts));
|
||||||
|
}
|
||||||
|
port::Thread user_thread([&]() {
|
||||||
|
TEST_SYNC_POINT("DBAtomicFlushTest::BeforeDropCF");
|
||||||
|
ASSERT_OK(dbfull()->DropColumnFamily(handles_[1]));
|
||||||
|
TEST_SYNC_POINT("DBAtomicFlushTest::AfterDropCF");
|
||||||
|
});
|
||||||
|
FlushOptions flush_opts;
|
||||||
|
flush_opts.wait = true;
|
||||||
|
ASSERT_OK(dbfull()->Flush(flush_opts, handles_));
|
||||||
|
user_thread.join();
|
||||||
|
for (size_t i = 0; i != num_cfs; ++i) {
|
||||||
|
int cf_id = static_cast<int>(i);
|
||||||
|
ASSERT_EQ("value", Get(cf_id, "key"));
|
||||||
|
}
|
||||||
|
|
||||||
|
ReopenWithColumnFamilies({kDefaultColumnFamilyName, "eevee"}, options);
|
||||||
|
num_cfs = handles_.size();
|
||||||
|
ASSERT_EQ(2, num_cfs);
|
||||||
|
for (size_t i = 0; i != num_cfs; ++i) {
|
||||||
|
int cf_id = static_cast<int>(i);
|
||||||
|
ASSERT_EQ("value", Get(cf_id, "key"));
|
||||||
|
}
|
||||||
|
Destroy(options);
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest,
|
INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest,
|
||||||
testing::Bool());
|
testing::Bool());
|
||||||
|
|
||||||
|
@ -45,7 +45,6 @@
|
|||||||
#include "db/memtable_list.h"
|
#include "db/memtable_list.h"
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
#include "db/merge_helper.h"
|
#include "db/merge_helper.h"
|
||||||
#include "db/range_del_aggregator.h"
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "db/table_cache.h"
|
#include "db/table_cache.h"
|
||||||
#include "db/table_properties_collector.h"
|
#include "db/table_properties_collector.h"
|
||||||
@ -221,7 +220,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
|
|||||||
preserve_deletes_(options.preserve_deletes),
|
preserve_deletes_(options.preserve_deletes),
|
||||||
closed_(false),
|
closed_(false),
|
||||||
error_handler_(this, immutable_db_options_, &mutex_),
|
error_handler_(this, immutable_db_options_, &mutex_),
|
||||||
atomic_flush_commit_in_progress_(false) {
|
atomic_flush_install_cv_(&mutex_) {
|
||||||
// !batch_per_trx_ implies seq_per_batch_ because it is only unset for
|
// !batch_per_trx_ implies seq_per_batch_ because it is only unset for
|
||||||
// WriteUnprepared, which should use seq_per_batch_.
|
// WriteUnprepared, which should use seq_per_batch_.
|
||||||
assert(batch_per_txn_ || seq_per_batch_);
|
assert(batch_per_txn_ || seq_per_batch_);
|
||||||
@ -1033,7 +1032,7 @@ bool DBImpl::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
InternalIterator* DBImpl::NewInternalIterator(
|
InternalIterator* DBImpl::NewInternalIterator(
|
||||||
Arena* arena, RangeDelAggregatorV2* range_del_agg, SequenceNumber sequence,
|
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence,
|
||||||
ColumnFamilyHandle* column_family) {
|
ColumnFamilyHandle* column_family) {
|
||||||
ColumnFamilyData* cfd;
|
ColumnFamilyData* cfd;
|
||||||
if (column_family == nullptr) {
|
if (column_family == nullptr) {
|
||||||
@ -1150,10 +1149,12 @@ static void CleanupIteratorState(void* arg1, void* /*arg2*/) {
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
InternalIterator* DBImpl::NewInternalIterator(
|
InternalIterator* DBImpl::NewInternalIterator(const ReadOptions& read_options,
|
||||||
const ReadOptions& read_options, ColumnFamilyData* cfd,
|
ColumnFamilyData* cfd,
|
||||||
SuperVersion* super_version, Arena* arena,
|
SuperVersion* super_version,
|
||||||
RangeDelAggregatorV2* range_del_agg, SequenceNumber sequence) {
|
Arena* arena,
|
||||||
|
RangeDelAggregator* range_del_agg,
|
||||||
|
SequenceNumber sequence) {
|
||||||
InternalIterator* internal_iter;
|
InternalIterator* internal_iter;
|
||||||
assert(arena != nullptr);
|
assert(arena != nullptr);
|
||||||
assert(range_del_agg != nullptr);
|
assert(range_del_agg != nullptr);
|
||||||
|
34
db/db_impl.h
34
db/db_impl.h
@ -31,7 +31,7 @@
|
|||||||
#include "db/log_writer.h"
|
#include "db/log_writer.h"
|
||||||
#include "db/logs_with_prep_tracker.h"
|
#include "db/logs_with_prep_tracker.h"
|
||||||
#include "db/pre_release_callback.h"
|
#include "db/pre_release_callback.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/read_callback.h"
|
#include "db/read_callback.h"
|
||||||
#include "db/snapshot_checker.h"
|
#include "db/snapshot_checker.h"
|
||||||
#include "db/snapshot_impl.h"
|
#include "db/snapshot_impl.h"
|
||||||
@ -374,8 +374,8 @@ class DBImpl : public DB {
|
|||||||
// The keys of this iterator are internal keys (see format.h).
|
// The keys of this iterator are internal keys (see format.h).
|
||||||
// The returned iterator should be deleted when no longer needed.
|
// The returned iterator should be deleted when no longer needed.
|
||||||
InternalIterator* NewInternalIterator(
|
InternalIterator* NewInternalIterator(
|
||||||
Arena* arena, RangeDelAggregatorV2* range_del_agg,
|
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence,
|
||||||
SequenceNumber sequence, ColumnFamilyHandle* column_family = nullptr);
|
ColumnFamilyHandle* column_family = nullptr);
|
||||||
|
|
||||||
LogsWithPrepTracker* logs_with_prep_tracker() {
|
LogsWithPrepTracker* logs_with_prep_tracker() {
|
||||||
return &logs_with_prep_tracker_;
|
return &logs_with_prep_tracker_;
|
||||||
@ -578,12 +578,9 @@ class DBImpl : public DB {
|
|||||||
|
|
||||||
const WriteController& write_controller() { return write_controller_; }
|
const WriteController& write_controller() { return write_controller_; }
|
||||||
|
|
||||||
InternalIterator* NewInternalIterator(const ReadOptions&,
|
InternalIterator* NewInternalIterator(
|
||||||
ColumnFamilyData* cfd,
|
const ReadOptions&, ColumnFamilyData* cfd, SuperVersion* super_version,
|
||||||
SuperVersion* super_version,
|
Arena* arena, RangeDelAggregator* range_del_agg, SequenceNumber sequence);
|
||||||
Arena* arena,
|
|
||||||
RangeDelAggregatorV2* range_del_agg,
|
|
||||||
SequenceNumber sequence);
|
|
||||||
|
|
||||||
// hollow transactions shell used for recovery.
|
// hollow transactions shell used for recovery.
|
||||||
// these will then be passed to TransactionDB so that
|
// these will then be passed to TransactionDB so that
|
||||||
@ -1613,15 +1610,16 @@ class DBImpl : public DB {
|
|||||||
|
|
||||||
ErrorHandler error_handler_;
|
ErrorHandler error_handler_;
|
||||||
|
|
||||||
// True if the DB is committing atomic flush.
|
// Conditional variable to coordinate installation of atomic flush results.
|
||||||
// TODO (yanqin) the current impl assumes that the entire DB belongs to
|
// With atomic flush, each bg thread installs the result of flushing multiple
|
||||||
// a single atomic flush group. In the future we need to add a new class
|
// column families, and different threads can flush different column
|
||||||
// (struct) similar to the following to make it more general.
|
// families. It's difficult to rely on one thread to perform batch
|
||||||
// struct AtomicFlushGroup {
|
// installation for all threads. This is different from the non-atomic flush
|
||||||
// bool commit_in_progress_;
|
// case.
|
||||||
// std::vector<MemTableList*> imm_lists;
|
// atomic_flush_install_cv_ makes sure that threads install atomic flush
|
||||||
// };
|
// results sequentially. Flush results of memtables with lower IDs get
|
||||||
bool atomic_flush_commit_in_progress_;
|
// installed to MANIFEST first.
|
||||||
|
InstrumentedCondVar atomic_flush_install_cv_;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern Options SanitizeOptions(const std::string& db,
|
extern Options SanitizeOptions(const std::string& db,
|
||||||
|
@ -219,20 +219,24 @@ Status DBImpl::FlushMemTablesToOutputFiles(
|
|||||||
return AtomicFlushMemTablesToOutputFiles(bg_flush_args, made_progress,
|
return AtomicFlushMemTablesToOutputFiles(bg_flush_args, made_progress,
|
||||||
job_context, log_buffer);
|
job_context, log_buffer);
|
||||||
}
|
}
|
||||||
Status s;
|
Status status;
|
||||||
for (auto& arg : bg_flush_args) {
|
for (auto& arg : bg_flush_args) {
|
||||||
ColumnFamilyData* cfd = arg.cfd_;
|
ColumnFamilyData* cfd = arg.cfd_;
|
||||||
const MutableCFOptions& mutable_cf_options =
|
MutableCFOptions mutable_cf_options = *cfd->GetLatestMutableCFOptions();
|
||||||
*cfd->GetLatestMutableCFOptions();
|
|
||||||
SuperVersionContext* superversion_context = arg.superversion_context_;
|
SuperVersionContext* superversion_context = arg.superversion_context_;
|
||||||
s = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress,
|
Status s = FlushMemTableToOutputFile(cfd, mutable_cf_options, made_progress,
|
||||||
job_context, superversion_context,
|
job_context, superversion_context,
|
||||||
log_buffer);
|
log_buffer);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
|
status = s;
|
||||||
|
if (!s.IsShutdownInProgress()) {
|
||||||
|
// At this point, DB is not shutting down, nor is cfd dropped.
|
||||||
|
// Something is wrong, thus we break out of the loop.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return s;
|
}
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -271,7 +275,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
}
|
}
|
||||||
autovector<Directory*> distinct_output_dirs;
|
autovector<Directory*> distinct_output_dirs;
|
||||||
std::vector<FlushJob> jobs;
|
std::vector<FlushJob> jobs;
|
||||||
|
std::vector<MutableCFOptions> all_mutable_cf_options;
|
||||||
int num_cfs = static_cast<int>(cfds.size());
|
int num_cfs = static_cast<int>(cfds.size());
|
||||||
|
all_mutable_cf_options.reserve(num_cfs);
|
||||||
for (int i = 0; i < num_cfs; ++i) {
|
for (int i = 0; i < num_cfs; ++i) {
|
||||||
auto cfd = cfds[i];
|
auto cfd = cfds[i];
|
||||||
Directory* data_dir = GetDataDir(cfd, 0U);
|
Directory* data_dir = GetDataDir(cfd, 0U);
|
||||||
@ -290,8 +296,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
distinct_output_dirs.emplace_back(data_dir);
|
distinct_output_dirs.emplace_back(data_dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
const MutableCFOptions& mutable_cf_options =
|
all_mutable_cf_options.emplace_back(*cfd->GetLatestMutableCFOptions());
|
||||||
*cfd->GetLatestMutableCFOptions();
|
const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.back();
|
||||||
const uint64_t* max_memtable_id = &(bg_flush_args[i].max_memtable_id_);
|
const uint64_t* max_memtable_id = &(bg_flush_args[i].max_memtable_id_);
|
||||||
jobs.emplace_back(
|
jobs.emplace_back(
|
||||||
dbname_, cfds[i], immutable_db_options_, mutable_cf_options,
|
dbname_, cfds[i], immutable_db_options_, mutable_cf_options,
|
||||||
@ -304,21 +310,18 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
jobs.back().PickMemTable();
|
jobs.back().PickMemTable();
|
||||||
}
|
}
|
||||||
|
|
||||||
autovector<FileMetaData> file_meta;
|
std::vector<FileMetaData> file_meta(num_cfs);
|
||||||
Status s;
|
Status s;
|
||||||
assert(num_cfs == static_cast<int>(jobs.size()));
|
assert(num_cfs == static_cast<int>(jobs.size()));
|
||||||
|
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
|
||||||
file_meta.emplace_back();
|
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
const MutableCFOptions& mutable_cf_options =
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
*cfds[i]->GetLatestMutableCFOptions();
|
const MutableCFOptions& mutable_cf_options = all_mutable_cf_options.at(i);
|
||||||
// may temporarily unlock and lock the mutex.
|
// may temporarily unlock and lock the mutex.
|
||||||
NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options,
|
NotifyOnFlushBegin(cfds[i], &file_meta[i], mutable_cf_options,
|
||||||
job_context->job_id, jobs[i].GetTableProperties());
|
job_context->job_id, jobs[i].GetTableProperties());
|
||||||
#endif /* !ROCKSDB_LITE */
|
|
||||||
}
|
}
|
||||||
|
#endif /* !ROCKSDB_LITE */
|
||||||
|
|
||||||
if (logfile_number_ > 0) {
|
if (logfile_number_ > 0) {
|
||||||
// TODO (yanqin) investigate whether we should sync the closed logs for
|
// TODO (yanqin) investigate whether we should sync the closed logs for
|
||||||
@ -331,8 +334,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
autovector<std::pair<bool, Status>> exec_status;
|
autovector<std::pair<bool, Status>> exec_status;
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
// Initially all jobs are not executed, with status OK.
|
// Initially all jobs are not executed, with status OK.
|
||||||
std::pair<bool, Status> elem(false, Status::OK());
|
exec_status.emplace_back(false, Status::OK());
|
||||||
exec_status.emplace_back(elem);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
@ -341,10 +343,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
exec_status[i].second =
|
exec_status[i].second =
|
||||||
jobs[i].Run(&logs_with_prep_tracker_, &file_meta[i]);
|
jobs[i].Run(&logs_with_prep_tracker_, &file_meta[i]);
|
||||||
exec_status[i].first = true;
|
exec_status[i].first = true;
|
||||||
if (!exec_status[i].second.ok()) {
|
|
||||||
s = exec_status[i].second;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (num_cfs > 1) {
|
if (num_cfs > 1) {
|
||||||
TEST_SYNC_POINT(
|
TEST_SYNC_POINT(
|
||||||
@ -352,17 +350,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
TEST_SYNC_POINT(
|
TEST_SYNC_POINT(
|
||||||
"DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2");
|
"DBImpl::AtomicFlushMemTablesToOutputFiles:SomeFlushJobsComplete:2");
|
||||||
}
|
}
|
||||||
if (s.ok()) {
|
|
||||||
exec_status[0].second =
|
exec_status[0].second =
|
||||||
jobs[0].Run(&logs_with_prep_tracker_, &file_meta[0]);
|
jobs[0].Run(&logs_with_prep_tracker_, &file_meta[0]);
|
||||||
exec_status[0].first = true;
|
exec_status[0].first = true;
|
||||||
if (!exec_status[0].second.ok()) {
|
|
||||||
s = exec_status[0].second;
|
Status error_status;
|
||||||
|
for (const auto& e : exec_status) {
|
||||||
|
if (!e.second.ok()) {
|
||||||
|
s = e.second;
|
||||||
|
if (!e.second.IsShutdownInProgress()) {
|
||||||
|
// If a flush job did not return OK, and the CF is not dropped, and
|
||||||
|
// the DB is not shutting down, then we have to return this result to
|
||||||
|
// caller later.
|
||||||
|
error_status = e.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ok()) {
|
s = error_status.ok() ? s : error_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s.ok() || s.IsShutdownInProgress()) {
|
||||||
// Sync on all distinct output directories.
|
// Sync on all distinct output directories.
|
||||||
for (auto dir : distinct_output_dirs) {
|
for (auto dir : distinct_output_dirs) {
|
||||||
if (dir != nullptr) {
|
if (dir != nullptr) {
|
||||||
@ -372,37 +380,78 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
|
auto wait_to_install_func = [&]() {
|
||||||
|
bool ready = true;
|
||||||
|
for (size_t i = 0; i != cfds.size(); ++i) {
|
||||||
|
const auto& mems = jobs[i].GetMemTables();
|
||||||
|
if (cfds[i]->IsDropped()) {
|
||||||
|
// If the column family is dropped, then do not wait.
|
||||||
|
continue;
|
||||||
|
} else if (!mems.empty() &&
|
||||||
|
cfds[i]->imm()->GetEarliestMemTableID() < mems[0]->GetID()) {
|
||||||
|
// If a flush job needs to install the flush result for mems and
|
||||||
|
// mems[0] is not the earliest memtable, it means another thread must
|
||||||
|
// be installing flush results for the same column family, then the
|
||||||
|
// current thread needs to wait.
|
||||||
|
ready = false;
|
||||||
|
break;
|
||||||
|
} else if (mems.empty() && cfds[i]->imm()->GetEarliestMemTableID() <=
|
||||||
|
bg_flush_args[i].max_memtable_id_) {
|
||||||
|
// If a flush job does not need to install flush results, then it has
|
||||||
|
// to wait until all memtables up to max_memtable_id_ (inclusive) are
|
||||||
|
// installed.
|
||||||
|
ready = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ready;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool resuming_from_bg_err = error_handler_.IsDBStopped();
|
||||||
|
while ((!error_handler_.IsDBStopped() ||
|
||||||
|
error_handler_.GetRecoveryError().ok()) &&
|
||||||
|
!wait_to_install_func()) {
|
||||||
|
atomic_flush_install_cv_.Wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
s = resuming_from_bg_err ? error_handler_.GetRecoveryError()
|
||||||
|
: error_handler_.GetBGError();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s.ok()) {
|
||||||
|
autovector<ColumnFamilyData*> tmp_cfds;
|
||||||
autovector<const autovector<MemTable*>*> mems_list;
|
autovector<const autovector<MemTable*>*> mems_list;
|
||||||
|
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
||||||
|
autovector<FileMetaData*> tmp_file_meta;
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
const auto& mems = jobs[i].GetMemTables();
|
const auto& mems = jobs[i].GetMemTables();
|
||||||
|
if (!cfds[i]->IsDropped() && !mems.empty()) {
|
||||||
|
tmp_cfds.emplace_back(cfds[i]);
|
||||||
mems_list.emplace_back(&mems);
|
mems_list.emplace_back(&mems);
|
||||||
}
|
mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]);
|
||||||
autovector<ColumnFamilyData*> all_cfds;
|
tmp_file_meta.emplace_back(&file_meta[i]);
|
||||||
autovector<MemTableList*> imm_lists;
|
|
||||||
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
|
||||||
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
||||||
all_cfds.emplace_back(cfd);
|
|
||||||
imm_lists.emplace_back(cfd->imm());
|
|
||||||
mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions());
|
|
||||||
}
|
|
||||||
|
|
||||||
s = MemTableList::TryInstallMemtableFlushResults(
|
|
||||||
imm_lists, all_cfds, mutable_cf_options_list, mems_list,
|
|
||||||
&atomic_flush_commit_in_progress_, &logs_with_prep_tracker_,
|
|
||||||
versions_.get(), &mutex_, file_meta, &job_context->memtables_to_free,
|
|
||||||
directories_.GetDbDir(), log_buffer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s.ok()) {
|
s = InstallMemtableAtomicFlushResults(
|
||||||
|
nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
|
||||||
|
versions_.get(), &mutex_, tmp_file_meta,
|
||||||
|
&job_context->memtables_to_free, directories_.GetDbDir(), log_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s.ok() || s.IsShutdownInProgress()) {
|
||||||
assert(num_cfs ==
|
assert(num_cfs ==
|
||||||
static_cast<int>(job_context->superversion_contexts.size()));
|
static_cast<int>(job_context->superversion_contexts.size()));
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
|
if (cfds[i]->IsDropped()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
InstallSuperVersionAndScheduleWork(cfds[i],
|
InstallSuperVersionAndScheduleWork(cfds[i],
|
||||||
&job_context->superversion_contexts[i],
|
&job_context->superversion_contexts[i],
|
||||||
*cfds[i]->GetLatestMutableCFOptions());
|
all_mutable_cf_options[i]);
|
||||||
VersionStorageInfo::LevelSummaryStorage tmp;
|
VersionStorageInfo::LevelSummaryStorage tmp;
|
||||||
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
|
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
|
||||||
cfds[i]->GetName().c_str(),
|
cfds[i]->GetName().c_str(),
|
||||||
@ -415,8 +464,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
auto sfm = static_cast<SstFileManagerImpl*>(
|
auto sfm = static_cast<SstFileManagerImpl*>(
|
||||||
immutable_db_options_.sst_file_manager.get());
|
immutable_db_options_.sst_file_manager.get());
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
NotifyOnFlushCompleted(cfds[i], &file_meta[i],
|
if (cfds[i]->IsDropped()) {
|
||||||
*cfds[i]->GetLatestMutableCFOptions(),
|
continue;
|
||||||
|
}
|
||||||
|
NotifyOnFlushCompleted(cfds[i], &file_meta[i], all_mutable_cf_options[i],
|
||||||
job_context->job_id, jobs[i].GetTableProperties());
|
job_context->job_id, jobs[i].GetTableProperties());
|
||||||
if (sfm) {
|
if (sfm) {
|
||||||
std::string file_path = MakeTableFileName(
|
std::string file_path = MakeTableFileName(
|
||||||
@ -434,7 +485,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
#endif // ROCKSDB_LITE
|
#endif // ROCKSDB_LITE
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!s.ok()) {
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
||||||
|
// it is not because of CF drop.
|
||||||
|
if (!s.ok() && !s.IsShutdownInProgress()) {
|
||||||
// Have to cancel the flush jobs that have NOT executed because we need to
|
// Have to cancel the flush jobs that have NOT executed because we need to
|
||||||
// unref the versions.
|
// unref the versions.
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
@ -442,7 +495,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
jobs[i].Cancel();
|
jobs[i].Cancel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!s.IsShutdownInProgress()) {
|
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (int i = 0; i != num_cfs; ++i) {
|
||||||
if (exec_status[i].first && exec_status[i].second.ok()) {
|
if (exec_status[i].first && exec_status[i].second.ok()) {
|
||||||
auto& mems = jobs[i].GetMemTables();
|
auto& mems = jobs[i].GetMemTables();
|
||||||
@ -453,7 +505,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|||||||
Status new_bg_error = s;
|
Status new_bg_error = s;
|
||||||
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
|
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -1407,6 +1458,7 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level,
|
|||||||
void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
||||||
FlushRequest* req) {
|
FlushRequest* req) {
|
||||||
assert(req != nullptr);
|
assert(req != nullptr);
|
||||||
|
req->reserve(cfds.size());
|
||||||
for (const auto cfd : cfds) {
|
for (const auto cfd : cfds) {
|
||||||
if (nullptr == cfd) {
|
if (nullptr == cfd) {
|
||||||
// cfd may be null, see DBImpl::ScheduleFlushes
|
// cfd may be null, see DBImpl::ScheduleFlushes
|
||||||
@ -1440,12 +1492,17 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|||||||
write_thread_.EnterUnbatched(&w, &mutex_);
|
write_thread_.EnterUnbatched(&w, &mutex_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) {
|
||||||
|
s = SwitchMemtable(cfd, &context);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s.ok()) {
|
||||||
if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
|
if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
|
||||||
!cached_recoverable_state_empty_.load()) {
|
!cached_recoverable_state_empty_.load()) {
|
||||||
s = SwitchMemtable(cfd, &context);
|
|
||||||
flush_memtable_id = cfd->imm()->GetLatestMemTableID();
|
flush_memtable_id = cfd->imm()->GetLatestMemTableID();
|
||||||
flush_req.emplace_back(cfd, flush_memtable_id);
|
flush_req.emplace_back(cfd, flush_memtable_id);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (s.ok() && !flush_req.empty()) {
|
if (s.ok() && !flush_req.empty()) {
|
||||||
for (auto& elem : flush_req) {
|
for (auto& elem : flush_req) {
|
||||||
@ -1518,6 +1575,9 @@ Status DBImpl::AtomicFlushMemTables(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto cfd : cfds) {
|
for (auto cfd : cfds) {
|
||||||
|
if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
cfd->Ref();
|
cfd->Ref();
|
||||||
s = SwitchMemtable(cfd, &context);
|
s = SwitchMemtable(cfd, &context);
|
||||||
cfd->Unref();
|
cfd->Unref();
|
||||||
@ -1539,6 +1599,7 @@ Status DBImpl::AtomicFlushMemTables(
|
|||||||
write_thread_.ExitUnbatched(&w);
|
write_thread_.ExitUnbatched(&w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush");
|
||||||
|
|
||||||
if (s.ok() && flush_options.wait) {
|
if (s.ok() && flush_options.wait) {
|
||||||
autovector<const uint64_t*> flush_memtable_ids;
|
autovector<const uint64_t*> flush_memtable_ids;
|
||||||
@ -2046,6 +2107,7 @@ void DBImpl::BackgroundCallFlush() {
|
|||||||
bg_flush_scheduled_--;
|
bg_flush_scheduled_--;
|
||||||
// See if there's more work to be done
|
// See if there's more work to be done
|
||||||
MaybeScheduleFlushOrCompaction();
|
MaybeScheduleFlushOrCompaction();
|
||||||
|
atomic_flush_install_cv_.SignalAll();
|
||||||
bg_cv_.SignalAll();
|
bg_cv_.SignalAll();
|
||||||
// IMPORTANT: there should be no code after calling SignalAll. This call may
|
// IMPORTANT: there should be no code after calling SignalAll. This call may
|
||||||
// signal the DB destructor that it's OK to proceed with destruction. In
|
// signal the DB destructor that it's OK to proceed with destruction. In
|
||||||
|
@ -23,8 +23,7 @@
|
|||||||
#include "util/sync_point.h"
|
#include "util/sync_point.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
Options SanitizeOptions(const std::string& dbname,
|
Options SanitizeOptions(const std::string& dbname, const Options& src) {
|
||||||
const Options& src) {
|
|
||||||
auto db_options = SanitizeOptions(dbname, DBOptions(src));
|
auto db_options = SanitizeOptions(dbname, DBOptions(src));
|
||||||
ImmutableDBOptions immutable_db_options(db_options);
|
ImmutableDBOptions immutable_db_options(db_options);
|
||||||
auto cf_options =
|
auto cf_options =
|
||||||
@ -56,10 +55,9 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
|
|||||||
result.write_buffer_manager.reset(
|
result.write_buffer_manager.reset(
|
||||||
new WriteBufferManager(result.db_write_buffer_size));
|
new WriteBufferManager(result.db_write_buffer_size));
|
||||||
}
|
}
|
||||||
auto bg_job_limits = DBImpl::GetBGJobLimits(result.max_background_flushes,
|
auto bg_job_limits = DBImpl::GetBGJobLimits(
|
||||||
result.max_background_compactions,
|
result.max_background_flushes, result.max_background_compactions,
|
||||||
result.max_background_jobs,
|
result.max_background_jobs, true /* parallelize_compactions */);
|
||||||
true /* parallelize_compactions */);
|
|
||||||
result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_compactions,
|
result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_compactions,
|
||||||
Env::Priority::LOW);
|
Env::Priority::LOW);
|
||||||
result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_flushes,
|
result.env->IncBackgroundThreadsIfNeeded(bg_job_limits.max_flushes,
|
||||||
@ -107,14 +105,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) {
|
|||||||
result.db_paths.emplace_back(dbname, std::numeric_limits<uint64_t>::max());
|
result.db_paths.emplace_back(dbname, std::numeric_limits<uint64_t>::max());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.use_direct_reads &&
|
if (result.use_direct_reads && result.compaction_readahead_size == 0) {
|
||||||
result.compaction_readahead_size == 0) {
|
|
||||||
TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr);
|
TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr);
|
||||||
result.compaction_readahead_size = 1024 * 1024 * 2;
|
result.compaction_readahead_size = 1024 * 1024 * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.compaction_readahead_size > 0 ||
|
if (result.compaction_readahead_size > 0 || result.use_direct_reads) {
|
||||||
result.use_direct_reads) {
|
|
||||||
result.new_table_reader_for_compaction_inputs = true;
|
result.new_table_reader_for_compaction_inputs = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,8 +254,7 @@ Status DBImpl::NewDB() {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::CreateAndNewDirectory(
|
Status DBImpl::CreateAndNewDirectory(Env* env, const std::string& dirname,
|
||||||
Env* env, const std::string& dirname,
|
|
||||||
std::unique_ptr<Directory>* directory) {
|
std::unique_ptr<Directory>* directory) {
|
||||||
// We call CreateDirIfMissing() as the directory may already exist (if we
|
// We call CreateDirIfMissing() as the directory may already exist (if we
|
||||||
// are reopening a DB), when this happens we don't want creating the
|
// are reopening a DB), when this happens we don't want creating the
|
||||||
@ -341,8 +336,8 @@ Status DBImpl::Recover(
|
|||||||
}
|
}
|
||||||
} else if (s.ok()) {
|
} else if (s.ok()) {
|
||||||
if (immutable_db_options_.error_if_exists) {
|
if (immutable_db_options_.error_if_exists) {
|
||||||
return Status::InvalidArgument(
|
return Status::InvalidArgument(dbname_,
|
||||||
dbname_, "exists (error_if_exists is true)");
|
"exists (error_if_exists is true)");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Unexpected error reading file
|
// Unexpected error reading file
|
||||||
@ -479,6 +474,28 @@ Status DBImpl::Recover(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (read_only) {
|
||||||
|
// If we are opening as read-only, we need to update options_file_number_
|
||||||
|
// to reflect the most recent OPTIONS file. It does not matter for regular
|
||||||
|
// read-write db instance because options_file_number_ will later be
|
||||||
|
// updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile.
|
||||||
|
std::vector<std::string> file_names;
|
||||||
|
if (s.ok()) {
|
||||||
|
s = env_->GetChildren(GetName(), &file_names);
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
uint64_t number = 0;
|
||||||
|
uint64_t options_file_number = 0;
|
||||||
|
FileType type;
|
||||||
|
for (const auto& fname : file_names) {
|
||||||
|
if (ParseFileName(fname, &number, &type) && type == kOptionsFile) {
|
||||||
|
options_file_number = std::max(number, options_file_number);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
versions_->options_file_number_ = options_file_number;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -527,8 +544,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
|||||||
std::map<std::string, uint32_t> cf_name_id_map;
|
std::map<std::string, uint32_t> cf_name_id_map;
|
||||||
std::map<uint32_t, uint64_t> cf_lognumber_map;
|
std::map<uint32_t, uint64_t> cf_lognumber_map;
|
||||||
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||||
cf_name_id_map.insert(
|
cf_name_id_map.insert(std::make_pair(cfd->GetName(), cfd->GetID()));
|
||||||
std::make_pair(cfd->GetName(), cfd->GetID()));
|
|
||||||
cf_lognumber_map.insert(
|
cf_lognumber_map.insert(
|
||||||
std::make_pair(cfd->GetID(), cfd->GetLogNumber()));
|
std::make_pair(cfd->GetID(), cfd->GetLogNumber()));
|
||||||
}
|
}
|
||||||
@ -880,8 +896,8 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
|||||||
// VersionSet::next_file_number_ always to be strictly greater than any
|
// VersionSet::next_file_number_ always to be strictly greater than any
|
||||||
// log number
|
// log number
|
||||||
versions_->MarkFileNumberUsed(max_log_number + 1);
|
versions_->MarkFileNumberUsed(max_log_number + 1);
|
||||||
status = versions_->LogAndApply(
|
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
|
||||||
cfd, *cfd->GetLatestMutableCFOptions(), edit, &mutex_);
|
edit, &mutex_);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
// Recovery failed
|
// Recovery failed
|
||||||
break;
|
break;
|
||||||
@ -994,12 +1010,17 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
|
|||||||
if (use_custom_gc_ && snapshot_checker == nullptr) {
|
if (use_custom_gc_ && snapshot_checker == nullptr) {
|
||||||
snapshot_checker = DisableGCSnapshotChecker::Instance();
|
snapshot_checker = DisableGCSnapshotChecker::Instance();
|
||||||
}
|
}
|
||||||
|
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
range_del_iters;
|
||||||
|
auto range_del_iter =
|
||||||
|
mem->NewRangeTombstoneIterator(ro, kMaxSequenceNumber);
|
||||||
|
if (range_del_iter != nullptr) {
|
||||||
|
range_del_iters.emplace_back(range_del_iter);
|
||||||
|
}
|
||||||
s = BuildTable(
|
s = BuildTable(
|
||||||
dbname_, env_, *cfd->ioptions(), mutable_cf_options,
|
dbname_, env_, *cfd->ioptions(), mutable_cf_options,
|
||||||
env_options_for_compaction_, cfd->table_cache(), iter.get(),
|
env_options_for_compaction_, cfd->table_cache(), iter.get(),
|
||||||
std::unique_ptr<InternalIterator>(
|
std::move(range_del_iters), &meta, cfd->internal_comparator(),
|
||||||
mem->NewRangeTombstoneIterator(ro, versions_->LastSequence())),
|
|
||||||
&meta, cfd->internal_comparator(),
|
|
||||||
cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(),
|
cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(),
|
||||||
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
|
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
|
||||||
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
|
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
|
||||||
@ -1033,8 +1054,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
|
|||||||
stats.bytes_written = meta.fd.GetFileSize();
|
stats.bytes_written = meta.fd.GetFileSize();
|
||||||
stats.num_output_files = 1;
|
stats.num_output_files = 1;
|
||||||
cfd->internal_stats()->AddCompactionStats(level, stats);
|
cfd->internal_stats()->AddCompactionStats(level, stats);
|
||||||
cfd->internal_stats()->AddCFStats(
|
cfd->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED,
|
||||||
InternalStats::BYTES_FLUSHED, meta.fd.GetFileSize());
|
meta.fd.GetFileSize());
|
||||||
RecordTick(stats_, COMPACT_WRITE_BYTES, meta.fd.GetFileSize());
|
RecordTick(stats_, COMPACT_WRITE_BYTES, meta.fd.GetFileSize());
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -1227,7 +1248,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
|
|||||||
!cfd->mem()->IsMergeOperatorSupported()) {
|
!cfd->mem()->IsMergeOperatorSupported()) {
|
||||||
s = Status::InvalidArgument(
|
s = Status::InvalidArgument(
|
||||||
"The memtable of column family %s does not support merge operator "
|
"The memtable of column family %s does not support merge operator "
|
||||||
"its options.merge_operator is non-null", cfd->GetName().c_str());
|
"its options.merge_operator is non-null",
|
||||||
|
cfd->GetName().c_str());
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
break;
|
break;
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
#include "db/db_iter.h"
|
#include "db/db_iter.h"
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
#include "db/range_del_aggregator.h"
|
|
||||||
#include "monitoring/perf_context_imp.h"
|
#include "monitoring/perf_context_imp.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
@ -89,10 +89,11 @@ class DBImplReadOnly : public DBImpl {
|
|||||||
virtual Status EnableFileDeletions(bool /*force*/) override {
|
virtual Status EnableFileDeletions(bool /*force*/) override {
|
||||||
return Status::NotSupported("Not supported operation in read only mode.");
|
return Status::NotSupported("Not supported operation in read only mode.");
|
||||||
}
|
}
|
||||||
virtual Status GetLiveFiles(std::vector<std::string>&,
|
virtual Status GetLiveFiles(std::vector<std::string>& ret,
|
||||||
uint64_t* /*manifest_file_size*/,
|
uint64_t* manifest_file_size,
|
||||||
bool /*flush_memtable*/ = true) override {
|
bool /*flush_memtable*/) override {
|
||||||
return Status::NotSupported("Not supported operation in read only mode.");
|
return DBImpl::GetLiveFiles(ret, manifest_file_size,
|
||||||
|
false /* flush_memtable */);
|
||||||
}
|
}
|
||||||
|
|
||||||
using DBImpl::Flush;
|
using DBImpl::Flush;
|
||||||
|
@ -265,18 +265,19 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
|
|||||||
// We're optimistic, updating the stats before we successfully
|
// We're optimistic, updating the stats before we successfully
|
||||||
// commit. That lets us release our leader status early.
|
// commit. That lets us release our leader status early.
|
||||||
auto stats = default_cf_internal_stats_;
|
auto stats = default_cf_internal_stats_;
|
||||||
stats->AddDBStats(InternalStats::NUMBER_KEYS_WRITTEN, total_count,
|
stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count,
|
||||||
concurrent_update);
|
concurrent_update);
|
||||||
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
|
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
|
||||||
stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size,
|
stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
|
||||||
concurrent_update);
|
concurrent_update);
|
||||||
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_SELF, 1, concurrent_update);
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
|
||||||
|
concurrent_update);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_SELF);
|
RecordTick(stats_, WRITE_DONE_BY_SELF);
|
||||||
auto write_done_by_other = write_group.size - 1;
|
auto write_done_by_other = write_group.size - 1;
|
||||||
if (write_done_by_other > 0) {
|
if (write_done_by_other > 0) {
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_OTHER, write_done_by_other,
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
|
||||||
concurrent_update);
|
write_done_by_other, concurrent_update);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
|
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
|
||||||
}
|
}
|
||||||
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
||||||
@ -467,9 +468,9 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto stats = default_cf_internal_stats_;
|
auto stats = default_cf_internal_stats_;
|
||||||
stats->AddDBStats(InternalStats::NUMBER_KEYS_WRITTEN, total_count);
|
stats->AddDBStats(InternalStats::kIntStatsNumKeysWritten, total_count);
|
||||||
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
|
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
|
||||||
stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size);
|
stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size);
|
||||||
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
||||||
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
||||||
|
|
||||||
@ -477,10 +478,10 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
|
|||||||
|
|
||||||
if (w.ShouldWriteToWAL()) {
|
if (w.ShouldWriteToWAL()) {
|
||||||
PERF_TIMER_GUARD(write_wal_time);
|
PERF_TIMER_GUARD(write_wal_time);
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_SELF, 1);
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
|
RecordTick(stats_, WRITE_DONE_BY_SELF, 1);
|
||||||
if (wal_write_group.size > 1) {
|
if (wal_write_group.size > 1) {
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_OTHER,
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
|
||||||
wal_write_group.size - 1);
|
wal_write_group.size - 1);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_OTHER, wal_write_group.size - 1);
|
RecordTick(stats_, WRITE_DONE_BY_OTHER, wal_write_group.size - 1);
|
||||||
}
|
}
|
||||||
@ -591,15 +592,16 @@ Status DBImpl::WriteImplWALOnly(const WriteOptions& write_options,
|
|||||||
// We're optimistic, updating the stats before we successfully
|
// We're optimistic, updating the stats before we successfully
|
||||||
// commit. That lets us release our leader status early.
|
// commit. That lets us release our leader status early.
|
||||||
auto stats = default_cf_internal_stats_;
|
auto stats = default_cf_internal_stats_;
|
||||||
stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size,
|
stats->AddDBStats(InternalStats::kIntStatsBytesWritten, total_byte_size,
|
||||||
concurrent_update);
|
concurrent_update);
|
||||||
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_SELF, 1, concurrent_update);
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneBySelf, 1,
|
||||||
|
concurrent_update);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_SELF);
|
RecordTick(stats_, WRITE_DONE_BY_SELF);
|
||||||
auto write_done_by_other = write_group.size - 1;
|
auto write_done_by_other = write_group.size - 1;
|
||||||
if (write_done_by_other > 0) {
|
if (write_done_by_other > 0) {
|
||||||
stats->AddDBStats(InternalStats::WRITE_DONE_BY_OTHER, write_done_by_other,
|
stats->AddDBStats(InternalStats::kIntStatsWriteDoneByOther,
|
||||||
concurrent_update);
|
write_done_by_other, concurrent_update);
|
||||||
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
|
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
|
||||||
}
|
}
|
||||||
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
|
||||||
@ -908,12 +910,12 @@ Status DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
|
|||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
auto stats = default_cf_internal_stats_;
|
auto stats = default_cf_internal_stats_;
|
||||||
if (need_log_sync) {
|
if (need_log_sync) {
|
||||||
stats->AddDBStats(InternalStats::WAL_FILE_SYNCED, 1);
|
stats->AddDBStats(InternalStats::kIntStatsWalFileSynced, 1);
|
||||||
RecordTick(stats_, WAL_FILE_SYNCED);
|
RecordTick(stats_, WAL_FILE_SYNCED);
|
||||||
}
|
}
|
||||||
stats->AddDBStats(InternalStats::WAL_FILE_BYTES, log_size);
|
stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size);
|
||||||
RecordTick(stats_, WAL_FILE_BYTES, log_size);
|
RecordTick(stats_, WAL_FILE_BYTES, log_size);
|
||||||
stats->AddDBStats(InternalStats::WRITE_WITH_WAL, write_with_wal);
|
stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal);
|
||||||
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
|
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
|
||||||
}
|
}
|
||||||
return status;
|
return status;
|
||||||
@ -959,9 +961,10 @@ Status DBImpl::ConcurrentWriteToWAL(const WriteThread::WriteGroup& write_group,
|
|||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
const bool concurrent = true;
|
const bool concurrent = true;
|
||||||
auto stats = default_cf_internal_stats_;
|
auto stats = default_cf_internal_stats_;
|
||||||
stats->AddDBStats(InternalStats::WAL_FILE_BYTES, log_size, concurrent);
|
stats->AddDBStats(InternalStats::kIntStatsWalFileBytes, log_size,
|
||||||
|
concurrent);
|
||||||
RecordTick(stats_, WAL_FILE_BYTES, log_size);
|
RecordTick(stats_, WAL_FILE_BYTES, log_size);
|
||||||
stats->AddDBStats(InternalStats::WRITE_WITH_WAL, write_with_wal,
|
stats->AddDBStats(InternalStats::kIntStatsWriteWithWal, write_with_wal,
|
||||||
concurrent);
|
concurrent);
|
||||||
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
|
RecordTick(stats_, WRITE_WITH_WAL, write_with_wal);
|
||||||
}
|
}
|
||||||
@ -1255,8 +1258,8 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
|
|||||||
}
|
}
|
||||||
assert(!delayed || !write_options.no_slowdown);
|
assert(!delayed || !write_options.no_slowdown);
|
||||||
if (delayed) {
|
if (delayed) {
|
||||||
default_cf_internal_stats_->AddDBStats(InternalStats::WRITE_STALL_MICROS,
|
default_cf_internal_stats_->AddDBStats(
|
||||||
time_delayed);
|
InternalStats::kIntStatsWriteStallMicros, time_delayed);
|
||||||
RecordTick(stats_, STALL_MICROS, time_delayed);
|
RecordTick(stats_, STALL_MICROS, time_delayed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,7 +171,7 @@ class DBIter final: public Iterator {
|
|||||||
iter_ = iter;
|
iter_ = iter;
|
||||||
iter_->SetPinnedItersMgr(&pinned_iters_mgr_);
|
iter_->SetPinnedItersMgr(&pinned_iters_mgr_);
|
||||||
}
|
}
|
||||||
virtual RangeDelAggregatorV2* GetRangeDelAggregator() {
|
virtual ReadRangeDelAggregator* GetRangeDelAggregator() {
|
||||||
return &range_del_agg_;
|
return &range_del_agg_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,7 +341,7 @@ class DBIter final: public Iterator {
|
|||||||
const bool total_order_seek_;
|
const bool total_order_seek_;
|
||||||
// List of operands for merge operator.
|
// List of operands for merge operator.
|
||||||
MergeContext merge_context_;
|
MergeContext merge_context_;
|
||||||
RangeDelAggregatorV2 range_del_agg_;
|
ReadRangeDelAggregator range_del_agg_;
|
||||||
LocalStatistics local_stats_;
|
LocalStatistics local_stats_;
|
||||||
PinnedIteratorsManager pinned_iters_mgr_;
|
PinnedIteratorsManager pinned_iters_mgr_;
|
||||||
ReadCallback* read_callback_;
|
ReadCallback* read_callback_;
|
||||||
@ -1479,7 +1479,7 @@ Iterator* NewDBIterator(Env* env, const ReadOptions& read_options,
|
|||||||
|
|
||||||
ArenaWrappedDBIter::~ArenaWrappedDBIter() { db_iter_->~DBIter(); }
|
ArenaWrappedDBIter::~ArenaWrappedDBIter() { db_iter_->~DBIter(); }
|
||||||
|
|
||||||
RangeDelAggregatorV2* ArenaWrappedDBIter::GetRangeDelAggregator() {
|
ReadRangeDelAggregator* ArenaWrappedDBIter::GetRangeDelAggregator() {
|
||||||
return db_iter_->GetRangeDelAggregator();
|
return db_iter_->GetRangeDelAggregator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include "db/db_impl.h"
|
#include "db/db_impl.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
@ -48,7 +48,7 @@ class ArenaWrappedDBIter : public Iterator {
|
|||||||
// Get the arena to be used to allocate memory for DBIter to be wrapped,
|
// Get the arena to be used to allocate memory for DBIter to be wrapped,
|
||||||
// as well as child iterators in it.
|
// as well as child iterators in it.
|
||||||
virtual Arena* GetArena() { return &arena_; }
|
virtual Arena* GetArena() { return &arena_; }
|
||||||
virtual RangeDelAggregatorV2* GetRangeDelAggregator();
|
virtual ReadRangeDelAggregator* GetRangeDelAggregator();
|
||||||
|
|
||||||
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
|
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
|
||||||
// a merging iterator.
|
// a merging iterator.
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include "db/db_test_util.h"
|
#include "db/db_test_util.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
|
#include "db/range_del_aggregator.h"
|
||||||
#include "port/stack_trace.h"
|
#include "port/stack_trace.h"
|
||||||
#include "rocksdb/memtablerep.h"
|
#include "rocksdb/memtablerep.h"
|
||||||
#include "rocksdb/slice_transform.h"
|
#include "rocksdb/slice_transform.h"
|
||||||
@ -135,7 +136,8 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
|
|||||||
MergeContext merge_context;
|
MergeContext merge_context;
|
||||||
Options options;
|
Options options;
|
||||||
InternalKeyComparator ikey_cmp(options.comparator);
|
InternalKeyComparator ikey_cmp(options.comparator);
|
||||||
RangeDelAggregator range_del_agg(ikey_cmp, {} /* snapshots */);
|
ReadRangeDelAggregator range_del_agg(&ikey_cmp,
|
||||||
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
|
||||||
// Create a MemTable
|
// Create a MemTable
|
||||||
InternalKeyComparator cmp(BytewiseComparator());
|
InternalKeyComparator cmp(BytewiseComparator());
|
||||||
|
@ -1041,11 +1041,16 @@ TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
|
|||||||
// L2:
|
// L2:
|
||||||
// [key000000#1,1, key000000#1,1]
|
// [key000000#1,1, key000000#1,1]
|
||||||
// [key000002#6,1, key000004#72057594037927935,15]
|
// [key000002#6,1, key000004#72057594037927935,15]
|
||||||
|
//
|
||||||
|
// At the same time, verify the compaction does not cause the key at the
|
||||||
|
// endpoint (key000002#6,1) to disappear.
|
||||||
|
ASSERT_EQ(value, Get(Key(2)));
|
||||||
auto begin_str = Key(3);
|
auto begin_str = Key(3);
|
||||||
const rocksdb::Slice begin = begin_str;
|
const rocksdb::Slice begin = begin_str;
|
||||||
dbfull()->TEST_CompactRange(1, &begin, nullptr);
|
dbfull()->TEST_CompactRange(1, &begin, nullptr);
|
||||||
ASSERT_EQ(1, NumTableFilesAtLevel(1));
|
ASSERT_EQ(1, NumTableFilesAtLevel(1));
|
||||||
ASSERT_EQ(2, NumTableFilesAtLevel(2));
|
ASSERT_EQ(2, NumTableFilesAtLevel(2));
|
||||||
|
ASSERT_EQ(value, Get(Key(2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -814,7 +814,7 @@ std::string DBTestBase::AllEntriesFor(const Slice& user_key, int cf) {
|
|||||||
Arena arena;
|
Arena arena;
|
||||||
auto options = CurrentOptions();
|
auto options = CurrentOptions();
|
||||||
InternalKeyComparator icmp(options.comparator);
|
InternalKeyComparator icmp(options.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
ScopedArenaIterator iter;
|
ScopedArenaIterator iter;
|
||||||
if (cf == 0) {
|
if (cf == 0) {
|
||||||
@ -1227,7 +1227,7 @@ void DBTestBase::validateNumberOfEntries(int numValues, int cf) {
|
|||||||
Arena arena;
|
Arena arena;
|
||||||
auto options = CurrentOptions();
|
auto options = CurrentOptions();
|
||||||
InternalKeyComparator icmp(options.comparator);
|
InternalKeyComparator icmp(options.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
// This should be defined after range_del_agg so that it destructs the
|
// This should be defined after range_del_agg so that it destructs the
|
||||||
// assigned iterator before it range_del_agg is already destructed.
|
// assigned iterator before it range_del_agg is already destructed.
|
||||||
@ -1437,7 +1437,7 @@ void DBTestBase::VerifyDBInternal(
|
|||||||
std::vector<std::pair<std::string, std::string>> true_data) {
|
std::vector<std::pair<std::string, std::string>> true_data) {
|
||||||
Arena arena;
|
Arena arena;
|
||||||
InternalKeyComparator icmp(last_options_.comparator);
|
InternalKeyComparator icmp(last_options_.comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
auto iter =
|
auto iter =
|
||||||
dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber);
|
dbfull()->NewInternalIterator(&arena, &range_del_agg, kMaxSequenceNumber);
|
||||||
|
@ -24,14 +24,15 @@
|
|||||||
#include "db/event_helpers.h"
|
#include "db/event_helpers.h"
|
||||||
#include "db/log_reader.h"
|
#include "db/log_reader.h"
|
||||||
#include "db/log_writer.h"
|
#include "db/log_writer.h"
|
||||||
|
#include "db/memtable.h"
|
||||||
#include "db/memtable_list.h"
|
#include "db/memtable_list.h"
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "monitoring/iostats_context_imp.h"
|
#include "monitoring/iostats_context_imp.h"
|
||||||
#include "monitoring/perf_context_imp.h"
|
#include "monitoring/perf_context_imp.h"
|
||||||
#include "monitoring/thread_status_util.h"
|
#include "monitoring/thread_status_util.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "db/memtable.h"
|
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
@ -295,7 +296,8 @@ Status FlushJob::WriteLevel0Table() {
|
|||||||
// memtable and its associated range deletion memtable, respectively, at
|
// memtable and its associated range deletion memtable, respectively, at
|
||||||
// corresponding indexes.
|
// corresponding indexes.
|
||||||
std::vector<InternalIterator*> memtables;
|
std::vector<InternalIterator*> memtables;
|
||||||
std::vector<InternalIterator*> range_del_iters;
|
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
range_del_iters;
|
||||||
ReadOptions ro;
|
ReadOptions ro;
|
||||||
ro.total_order_seek = true;
|
ro.total_order_seek = true;
|
||||||
Arena arena;
|
Arena arena;
|
||||||
@ -308,9 +310,9 @@ Status FlushJob::WriteLevel0Table() {
|
|||||||
cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
|
cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
|
||||||
memtables.push_back(m->NewIterator(ro, &arena));
|
memtables.push_back(m->NewIterator(ro, &arena));
|
||||||
auto* range_del_iter =
|
auto* range_del_iter =
|
||||||
m->NewRangeTombstoneIterator(ro, versions_->LastSequence());
|
m->NewRangeTombstoneIterator(ro, kMaxSequenceNumber);
|
||||||
if (range_del_iter != nullptr) {
|
if (range_del_iter != nullptr) {
|
||||||
range_del_iters.push_back(range_del_iter);
|
range_del_iters.emplace_back(range_del_iter);
|
||||||
}
|
}
|
||||||
total_num_entries += m->num_entries();
|
total_num_entries += m->num_entries();
|
||||||
total_num_deletes += m->num_deletes();
|
total_num_deletes += m->num_deletes();
|
||||||
@ -329,10 +331,6 @@ Status FlushJob::WriteLevel0Table() {
|
|||||||
ScopedArenaIterator iter(
|
ScopedArenaIterator iter(
|
||||||
NewMergingIterator(&cfd_->internal_comparator(), &memtables[0],
|
NewMergingIterator(&cfd_->internal_comparator(), &memtables[0],
|
||||||
static_cast<int>(memtables.size()), &arena));
|
static_cast<int>(memtables.size()), &arena));
|
||||||
std::unique_ptr<InternalIterator> range_del_iter(NewMergingIterator(
|
|
||||||
&cfd_->internal_comparator(),
|
|
||||||
range_del_iters.empty() ? nullptr : &range_del_iters[0],
|
|
||||||
static_cast<int>(range_del_iters.size())));
|
|
||||||
ROCKS_LOG_INFO(db_options_.info_log,
|
ROCKS_LOG_INFO(db_options_.info_log,
|
||||||
"[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": started",
|
"[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": started",
|
||||||
cfd_->GetName().c_str(), job_context_->job_id,
|
cfd_->GetName().c_str(), job_context_->job_id,
|
||||||
@ -358,7 +356,7 @@ Status FlushJob::WriteLevel0Table() {
|
|||||||
s = BuildTable(
|
s = BuildTable(
|
||||||
dbname_, db_options_.env, *cfd_->ioptions(), mutable_cf_options_,
|
dbname_, db_options_.env, *cfd_->ioptions(), mutable_cf_options_,
|
||||||
env_options_, cfd_->table_cache(), iter.get(),
|
env_options_, cfd_->table_cache(), iter.get(),
|
||||||
std::move(range_del_iter), &meta_, cfd_->internal_comparator(),
|
std::move(range_del_iters), &meta_, cfd_->internal_comparator(),
|
||||||
cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(),
|
cfd_->int_tbl_prop_collector_factories(), cfd_->GetID(),
|
||||||
cfd_->GetName(), existing_snapshots_,
|
cfd_->GetName(), existing_snapshots_,
|
||||||
earliest_write_conflict_snapshot_, snapshot_checker_,
|
earliest_write_conflict_snapshot_, snapshot_checker_,
|
||||||
|
@ -279,7 +279,6 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
|
|||||||
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
|
*cfd->GetLatestMutableCFOptions(), kMaxSequenceNumber);
|
||||||
mem->SetID(i);
|
mem->SetID(i);
|
||||||
mem->Ref();
|
mem->Ref();
|
||||||
mem->TEST_AtomicFlushSequenceNumber() = 123;
|
|
||||||
|
|
||||||
for (size_t j = 0; j != num_keys_per_memtable; ++j) {
|
for (size_t j = 0; j != num_keys_per_memtable; ++j) {
|
||||||
std::string key(ToString(j + i * num_keys_per_memtable));
|
std::string key(ToString(j + i * num_keys_per_memtable));
|
||||||
@ -309,7 +308,9 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
|
|||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
HistogramData hist;
|
HistogramData hist;
|
||||||
autovector<FileMetaData> file_metas;
|
std::vector<FileMetaData> file_metas;
|
||||||
|
// Call reserve to avoid auto-resizing
|
||||||
|
file_metas.reserve(flush_jobs.size());
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
for (auto& job : flush_jobs) {
|
for (auto& job : flush_jobs) {
|
||||||
job.PickMemTable();
|
job.PickMemTable();
|
||||||
@ -320,23 +321,23 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) {
|
|||||||
ASSERT_OK(job.Run(nullptr /**/, &meta));
|
ASSERT_OK(job.Run(nullptr /**/, &meta));
|
||||||
file_metas.emplace_back(meta);
|
file_metas.emplace_back(meta);
|
||||||
}
|
}
|
||||||
|
autovector<FileMetaData*> file_meta_ptrs;
|
||||||
|
for (auto& meta : file_metas) {
|
||||||
|
file_meta_ptrs.push_back(&meta);
|
||||||
|
}
|
||||||
autovector<const autovector<MemTable*>*> mems_list;
|
autovector<const autovector<MemTable*>*> mems_list;
|
||||||
for (size_t i = 0; i != all_cfds.size(); ++i) {
|
for (size_t i = 0; i != all_cfds.size(); ++i) {
|
||||||
const auto& mems = flush_jobs[i].GetMemTables();
|
const auto& mems = flush_jobs[i].GetMemTables();
|
||||||
mems_list.push_back(&mems);
|
mems_list.push_back(&mems);
|
||||||
}
|
}
|
||||||
autovector<MemTableList*> imm_lists;
|
|
||||||
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
||||||
for (auto cfd : all_cfds) {
|
for (auto cfd : all_cfds) {
|
||||||
imm_lists.push_back(cfd->imm());
|
|
||||||
mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions());
|
mutable_cf_options_list.push_back(cfd->GetLatestMutableCFOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool atomic_flush_commit_in_progress = false;
|
Status s = InstallMemtableAtomicFlushResults(
|
||||||
Status s = MemTableList::TryInstallMemtableFlushResults(
|
nullptr /* imm_lists */, all_cfds, mutable_cf_options_list, mems_list,
|
||||||
imm_lists, all_cfds, mutable_cf_options_list, mems_list,
|
versions_.get(), &mutex_, file_meta_ptrs, &job_context.memtables_to_free,
|
||||||
&atomic_flush_commit_in_progress, nullptr /* logs_prep_tracker */,
|
|
||||||
versions_.get(), &mutex_, file_metas, &job_context.memtables_to_free,
|
|
||||||
nullptr /* db_directory */, nullptr /* log_buffer */);
|
nullptr /* db_directory */, nullptr /* log_buffer */);
|
||||||
ASSERT_OK(s);
|
ASSERT_OK(s);
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include "db/db_iter.h"
|
#include "db/db_iter.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/job_context.h"
|
#include "db/job_context.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/slice.h"
|
#include "rocksdb/slice.h"
|
||||||
@ -73,7 +73,7 @@ class ForwardLevelIterator : public InternalIterator {
|
|||||||
delete file_iter_;
|
delete file_iter_;
|
||||||
}
|
}
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&cfd_->internal_comparator(),
|
ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
file_iter_ = cfd_->table_cache()->NewIterator(
|
file_iter_ = cfd_->table_cache()->NewIterator(
|
||||||
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
|
read_options_, *(cfd_->soptions()), cfd_->internal_comparator(),
|
||||||
@ -610,7 +610,7 @@ void ForwardIterator::RebuildIterators(bool refresh_sv) {
|
|||||||
// New
|
// New
|
||||||
sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_));
|
sv_ = cfd_->GetReferencedSuperVersion(&(db_->mutex_));
|
||||||
}
|
}
|
||||||
RangeDelAggregatorV2 range_del_agg(&cfd_->internal_comparator(),
|
ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_);
|
mutable_iter_ = sv_->mem->NewIterator(read_options_, &arena_);
|
||||||
sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_);
|
sv_->imm->AddIterators(read_options_, &imm_iters_, &arena_);
|
||||||
@ -669,7 +669,7 @@ void ForwardIterator::RenewIterators() {
|
|||||||
|
|
||||||
mutable_iter_ = svnew->mem->NewIterator(read_options_, &arena_);
|
mutable_iter_ = svnew->mem->NewIterator(read_options_, &arena_);
|
||||||
svnew->imm->AddIterators(read_options_, &imm_iters_, &arena_);
|
svnew->imm->AddIterators(read_options_, &imm_iters_, &arena_);
|
||||||
RangeDelAggregatorV2 range_del_agg(&cfd_->internal_comparator(),
|
ReadRangeDelAggregator range_del_agg(&cfd_->internal_comparator(),
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
if (!read_options_.ignore_range_deletions) {
|
if (!read_options_.ignore_range_deletions) {
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
||||||
|
@ -949,14 +949,17 @@ void InternalStats::DumpDBStats(std::string* value) {
|
|||||||
seconds_up, interval_seconds_up);
|
seconds_up, interval_seconds_up);
|
||||||
value->append(buf);
|
value->append(buf);
|
||||||
// Cumulative
|
// Cumulative
|
||||||
uint64_t user_bytes_written = GetDBStats(InternalStats::BYTES_WRITTEN);
|
uint64_t user_bytes_written =
|
||||||
uint64_t num_keys_written = GetDBStats(InternalStats::NUMBER_KEYS_WRITTEN);
|
GetDBStats(InternalStats::kIntStatsBytesWritten);
|
||||||
uint64_t write_other = GetDBStats(InternalStats::WRITE_DONE_BY_OTHER);
|
uint64_t num_keys_written =
|
||||||
uint64_t write_self = GetDBStats(InternalStats::WRITE_DONE_BY_SELF);
|
GetDBStats(InternalStats::kIntStatsNumKeysWritten);
|
||||||
uint64_t wal_bytes = GetDBStats(InternalStats::WAL_FILE_BYTES);
|
uint64_t write_other = GetDBStats(InternalStats::kIntStatsWriteDoneByOther);
|
||||||
uint64_t wal_synced = GetDBStats(InternalStats::WAL_FILE_SYNCED);
|
uint64_t write_self = GetDBStats(InternalStats::kIntStatsWriteDoneBySelf);
|
||||||
uint64_t write_with_wal = GetDBStats(InternalStats::WRITE_WITH_WAL);
|
uint64_t wal_bytes = GetDBStats(InternalStats::kIntStatsWalFileBytes);
|
||||||
uint64_t write_stall_micros = GetDBStats(InternalStats::WRITE_STALL_MICROS);
|
uint64_t wal_synced = GetDBStats(InternalStats::kIntStatsWalFileSynced);
|
||||||
|
uint64_t write_with_wal = GetDBStats(InternalStats::kIntStatsWriteWithWal);
|
||||||
|
uint64_t write_stall_micros =
|
||||||
|
GetDBStats(InternalStats::kIntStatsWriteStallMicros);
|
||||||
|
|
||||||
const int kHumanMicrosLen = 32;
|
const int kHumanMicrosLen = 32;
|
||||||
char human_micros[kHumanMicrosLen];
|
char human_micros[kHumanMicrosLen];
|
||||||
|
@ -108,15 +108,15 @@ class InternalStats {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum InternalDBStatsType {
|
enum InternalDBStatsType {
|
||||||
WAL_FILE_BYTES,
|
kIntStatsWalFileBytes,
|
||||||
WAL_FILE_SYNCED,
|
kIntStatsWalFileSynced,
|
||||||
BYTES_WRITTEN,
|
kIntStatsBytesWritten,
|
||||||
NUMBER_KEYS_WRITTEN,
|
kIntStatsNumKeysWritten,
|
||||||
WRITE_DONE_BY_OTHER,
|
kIntStatsWriteDoneByOther,
|
||||||
WRITE_DONE_BY_SELF,
|
kIntStatsWriteDoneBySelf,
|
||||||
WRITE_WITH_WAL,
|
kIntStatsWriteWithWal,
|
||||||
WRITE_STALL_MICROS,
|
kIntStatsWriteStallMicros,
|
||||||
INTERNAL_DB_STATS_ENUM_MAX,
|
kIntStatsNumMax,
|
||||||
};
|
};
|
||||||
|
|
||||||
InternalStats(int num_levels, Env* env, ColumnFamilyData* cfd)
|
InternalStats(int num_levels, Env* env, ColumnFamilyData* cfd)
|
||||||
@ -292,7 +292,7 @@ class InternalStats {
|
|||||||
};
|
};
|
||||||
|
|
||||||
void Clear() {
|
void Clear() {
|
||||||
for (int i = 0; i < INTERNAL_DB_STATS_ENUM_MAX; i++) {
|
for (int i = 0; i < kIntStatsNumMax; i++) {
|
||||||
db_stats_[i].store(0);
|
db_stats_[i].store(0);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < INTERNAL_CF_STATS_ENUM_MAX; i++) {
|
for (int i = 0; i < INTERNAL_CF_STATS_ENUM_MAX; i++) {
|
||||||
@ -382,7 +382,7 @@ class InternalStats {
|
|||||||
bool HandleBlockCacheStat(Cache** block_cache);
|
bool HandleBlockCacheStat(Cache** block_cache);
|
||||||
|
|
||||||
// Per-DB stats
|
// Per-DB stats
|
||||||
std::atomic<uint64_t> db_stats_[INTERNAL_DB_STATS_ENUM_MAX];
|
std::atomic<uint64_t> db_stats_[kIntStatsNumMax];
|
||||||
// Per-ColumnFamily stats
|
// Per-ColumnFamily stats
|
||||||
uint64_t cf_stats_value_[INTERNAL_CF_STATS_ENUM_MAX];
|
uint64_t cf_stats_value_[INTERNAL_CF_STATS_ENUM_MAX];
|
||||||
uint64_t cf_stats_count_[INTERNAL_CF_STATS_ENUM_MAX];
|
uint64_t cf_stats_count_[INTERNAL_CF_STATS_ENUM_MAX];
|
||||||
@ -580,15 +580,15 @@ class InternalStats {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum InternalDBStatsType {
|
enum InternalDBStatsType {
|
||||||
WAL_FILE_BYTES,
|
kIntStatsWalFileBytes,
|
||||||
WAL_FILE_SYNCED,
|
kIntStatsWalFileSynced,
|
||||||
BYTES_WRITTEN,
|
kIntStatsBytesWritten,
|
||||||
NUMBER_KEYS_WRITTEN,
|
kIntStatsNumKeysWritten,
|
||||||
WRITE_DONE_BY_OTHER,
|
kIntStatsWriteDoneByOther,
|
||||||
WRITE_DONE_BY_SELF,
|
kIntStatsWriteDoneBySelf,
|
||||||
WRITE_WITH_WAL,
|
kIntStatsWriteWithWal,
|
||||||
WRITE_STALL_MICROS,
|
kIntStatsWriteStallMicros,
|
||||||
INTERNAL_DB_STATS_ENUM_MAX,
|
kIntStatsNumMax,
|
||||||
};
|
};
|
||||||
|
|
||||||
InternalStats(int /*num_levels*/, Env* /*env*/, ColumnFamilyData* /*cfd*/) {}
|
InternalStats(int /*num_levels*/, Env* /*env*/, ColumnFamilyData* /*cfd*/) {}
|
||||||
|
@ -905,6 +905,7 @@ class TestFileOperationListener : public EventListener {
|
|||||||
if (info.status.ok()) {
|
if (info.status.ok()) {
|
||||||
++file_reads_success_;
|
++file_reads_success_;
|
||||||
}
|
}
|
||||||
|
ReportDuration(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OnFileWriteFinish(const FileOperationInfo& info) override {
|
void OnFileWriteFinish(const FileOperationInfo& info) override {
|
||||||
@ -912,6 +913,7 @@ class TestFileOperationListener : public EventListener {
|
|||||||
if (info.status.ok()) {
|
if (info.status.ok()) {
|
||||||
++file_writes_success_;
|
++file_writes_success_;
|
||||||
}
|
}
|
||||||
|
ReportDuration(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ShouldBeNotifiedOnFileIO() override { return true; }
|
bool ShouldBeNotifiedOnFileIO() override { return true; }
|
||||||
@ -920,6 +922,13 @@ class TestFileOperationListener : public EventListener {
|
|||||||
std::atomic<size_t> file_reads_success_;
|
std::atomic<size_t> file_reads_success_;
|
||||||
std::atomic<size_t> file_writes_;
|
std::atomic<size_t> file_writes_;
|
||||||
std::atomic<size_t> file_writes_success_;
|
std::atomic<size_t> file_writes_success_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void ReportDuration(const FileOperationInfo& info) const {
|
||||||
|
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||||
|
info.finish_timestamp - info.start_timestamp);
|
||||||
|
ASSERT_GT(duration.count(), 0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(EventListenerTest, OnFileOperationTest) {
|
TEST_F(EventListenerTest, OnFileOperationTest) {
|
||||||
|
@ -13,17 +13,16 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "port/jemalloc_helper.h"
|
||||||
|
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
#ifdef ROCKSDB_JEMALLOC
|
#ifdef ROCKSDB_JEMALLOC
|
||||||
#ifdef __FreeBSD__
|
|
||||||
#include <malloc_np.h>
|
|
||||||
#else
|
|
||||||
#include "jemalloc/jemalloc.h"
|
|
||||||
#ifdef JEMALLOC_NO_RENAME
|
#ifdef JEMALLOC_NO_RENAME
|
||||||
#define malloc_stats_print je_malloc_stats_print
|
#define malloc_stats_print je_malloc_stats_print
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char* cur;
|
char* cur;
|
||||||
@ -41,10 +40,10 @@ static void GetJemallocStatus(void* mstat_arg, const char* status) {
|
|||||||
snprintf(mstat->cur, buf_size, "%s", status);
|
snprintf(mstat->cur, buf_size, "%s", status);
|
||||||
mstat->cur += status_len;
|
mstat->cur += status_len;
|
||||||
}
|
}
|
||||||
#endif // ROCKSDB_JEMALLOC
|
|
||||||
|
|
||||||
#ifdef ROCKSDB_JEMALLOC
|
|
||||||
void DumpMallocStats(std::string* stats) {
|
void DumpMallocStats(std::string* stats) {
|
||||||
|
if (!HasJemalloc()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
MallocStatus mstat;
|
MallocStatus mstat;
|
||||||
const unsigned int kMallocStatusLen = 1000000;
|
const unsigned int kMallocStatusLen = 1000000;
|
||||||
std::unique_ptr<char[]> buf{new char[kMallocStatusLen + 1]};
|
std::unique_ptr<char[]> buf{new char[kMallocStatusLen + 1]};
|
||||||
@ -56,5 +55,5 @@ void DumpMallocStats(std::string* stats) {
|
|||||||
#else
|
#else
|
||||||
void DumpMallocStats(std::string*) {}
|
void DumpMallocStats(std::string*) {}
|
||||||
#endif // ROCKSDB_JEMALLOC
|
#endif // ROCKSDB_JEMALLOC
|
||||||
}
|
} // namespace rocksdb
|
||||||
#endif // !ROCKSDB_LITE
|
#endif // !ROCKSDB_LITE
|
||||||
|
@ -428,7 +428,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator(
|
|||||||
comparator_.comparator);
|
comparator_.comparator);
|
||||||
|
|
||||||
auto* fragmented_iter = new FragmentedRangeTombstoneIterator(
|
auto* fragmented_iter = new FragmentedRangeTombstoneIterator(
|
||||||
fragmented_tombstone_list, read_seq, comparator_.comparator);
|
fragmented_tombstone_list, comparator_.comparator, read_seq);
|
||||||
return fragmented_iter;
|
return fragmented_iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,14 +386,16 @@ class MemTable {
|
|||||||
|
|
||||||
uint64_t GetID() const { return id_; }
|
uint64_t GetID() const { return id_; }
|
||||||
|
|
||||||
SequenceNumber& TEST_AtomicFlushSequenceNumber() {
|
void SetFlushCompleted(bool completed) { flush_completed_ = completed; }
|
||||||
return atomic_flush_seqno_;
|
|
||||||
|
uint64_t GetFileNumber() const { return file_number_; }
|
||||||
|
|
||||||
|
void SetFileNumber(uint64_t file_num) { file_number_ = file_num; }
|
||||||
|
|
||||||
|
void SetFlushInProgress(bool in_progress) {
|
||||||
|
flush_in_progress_ = in_progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TEST_SetFlushCompleted(bool completed) { flush_completed_ = completed; }
|
|
||||||
|
|
||||||
void TEST_SetFileNumber(uint64_t file_num) { file_number_ = file_num; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
|
enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
|
||||||
|
|
||||||
|
@ -159,7 +159,7 @@ bool MemTableListVersion::GetFromList(
|
|||||||
|
|
||||||
Status MemTableListVersion::AddRangeTombstoneIterators(
|
Status MemTableListVersion::AddRangeTombstoneIterators(
|
||||||
const ReadOptions& read_opts, Arena* /*arena*/,
|
const ReadOptions& read_opts, Arena* /*arena*/,
|
||||||
RangeDelAggregatorV2* range_del_agg) {
|
RangeDelAggregator* range_del_agg) {
|
||||||
assert(range_del_agg != nullptr);
|
assert(range_del_agg != nullptr);
|
||||||
for (auto& m : memlist_) {
|
for (auto& m : memlist_) {
|
||||||
// Using kMaxSequenceNumber is OK because these are immutable memtables.
|
// Using kMaxSequenceNumber is OK because these are immutable memtables.
|
||||||
@ -260,228 +260,6 @@ void MemTableListVersion::TrimHistory(autovector<MemTable*>* to_delete) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to record multiple successful flush to the MANIFEST as an atomic unit.
|
|
||||||
// This function may just return Status::OK if there has already been
|
|
||||||
// a concurrent thread performing actual recording.
|
|
||||||
Status MemTableList::TryInstallMemtableFlushResults(
|
|
||||||
autovector<MemTableList*>& imm_lists,
|
|
||||||
const autovector<ColumnFamilyData*>& cfds,
|
|
||||||
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
|
||||||
const autovector<const autovector<MemTable*>*>& mems_list,
|
|
||||||
bool* atomic_flush_commit_in_progress, LogsWithPrepTracker* prep_tracker,
|
|
||||||
VersionSet* vset, InstrumentedMutex* mu,
|
|
||||||
const autovector<FileMetaData>& file_metas,
|
|
||||||
autovector<MemTable*>* to_delete, Directory* db_directory,
|
|
||||||
LogBuffer* log_buffer) {
|
|
||||||
AutoThreadOperationStageUpdater stage_updater(
|
|
||||||
ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS);
|
|
||||||
mu->AssertHeld();
|
|
||||||
|
|
||||||
for (size_t k = 0; k != mems_list.size(); ++k) {
|
|
||||||
for (size_t i = 0; i != mems_list[k]->size(); ++i) {
|
|
||||||
assert(i == 0 || (*mems_list[k])[i]->GetEdits()->NumEntries() == 0);
|
|
||||||
(*mems_list[k])[i]->flush_completed_ = true;
|
|
||||||
(*mems_list[k])[i]->file_number_ = file_metas[k].fd.GetNumber();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(atomic_flush_commit_in_progress != nullptr);
|
|
||||||
Status s;
|
|
||||||
if (*atomic_flush_commit_in_progress) {
|
|
||||||
// If the function reaches here, there must be a concurrent thread that
|
|
||||||
// have already started recording to MANIFEST. Therefore we should just
|
|
||||||
// return Status::OK and let the othe thread finish writing to MANIFEST on
|
|
||||||
// our behalf.
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the function reaches here, the current thread will start writing to
|
|
||||||
// MANIFEST. It may record to MANIFEST the flush results of other flushes.
|
|
||||||
*atomic_flush_commit_in_progress = true;
|
|
||||||
|
|
||||||
auto comp = [&imm_lists](size_t lh, size_t rh) {
|
|
||||||
const auto& memlist1 = imm_lists[lh]->current_->memlist_;
|
|
||||||
const auto& memlist2 = imm_lists[rh]->current_->memlist_;
|
|
||||||
auto it1 = memlist1.rbegin();
|
|
||||||
auto it2 = memlist2.rbegin();
|
|
||||||
return (*it1)->atomic_flush_seqno_ > (*it2)->atomic_flush_seqno_;
|
|
||||||
};
|
|
||||||
// The top of the heap is the memtable with smallest atomic_flush_seqno_.
|
|
||||||
std::priority_queue<size_t, std::vector<size_t>, decltype(comp)> heap(comp);
|
|
||||||
// Sequence number of the oldest unfinished atomic flush.
|
|
||||||
SequenceNumber min_unfinished_seqno = kMaxSequenceNumber;
|
|
||||||
// Populate the heap with first element of each imm iff. it has been
|
|
||||||
// flushed to storage, i.e. flush_completed_ is true.
|
|
||||||
size_t num = imm_lists.size();
|
|
||||||
assert(num == cfds.size());
|
|
||||||
for (size_t i = 0; i != num; ++i) {
|
|
||||||
std::list<MemTable*>& memlist = imm_lists[i]->current_->memlist_;
|
|
||||||
if (memlist.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
auto it = memlist.rbegin();
|
|
||||||
if ((*it)->flush_completed_) {
|
|
||||||
heap.emplace(i);
|
|
||||||
} else if (min_unfinished_seqno > (*it)->atomic_flush_seqno_) {
|
|
||||||
min_unfinished_seqno = (*it)->atomic_flush_seqno_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (s.ok() && !heap.empty()) {
|
|
||||||
autovector<size_t> batch;
|
|
||||||
SequenceNumber seqno = kMaxSequenceNumber;
|
|
||||||
// Pop from the heap the memtables that belong to the same atomic flush,
|
|
||||||
// namely their atomic_flush_seqno_ are equal.
|
|
||||||
do {
|
|
||||||
size_t pos = heap.top();
|
|
||||||
const auto& memlist = imm_lists[pos]->current_->memlist_;
|
|
||||||
MemTable* mem = *(memlist.rbegin());
|
|
||||||
if (seqno == kMaxSequenceNumber) {
|
|
||||||
// First mem in this batch.
|
|
||||||
seqno = mem->atomic_flush_seqno_;
|
|
||||||
batch.emplace_back(pos);
|
|
||||||
heap.pop();
|
|
||||||
} else if (mem->atomic_flush_seqno_ == seqno) {
|
|
||||||
// mem has the same atomic_flush_seqno_, thus in the same atomic flush.
|
|
||||||
batch.emplace_back(pos);
|
|
||||||
heap.pop();
|
|
||||||
} else if (mem->atomic_flush_seqno_ > seqno) {
|
|
||||||
// mem belongs to another atomic flush with higher seqno, break the
|
|
||||||
// loop.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while (!heap.empty());
|
|
||||||
if (seqno >= min_unfinished_seqno) {
|
|
||||||
// If there is an older, unfinished atomic flush, then we should not
|
|
||||||
// proceed.
|
|
||||||
TEST_SYNC_POINT_CALLBACK(
|
|
||||||
"MemTableList::TryInstallMemtableFlushResults:"
|
|
||||||
"HasOlderUnfinishedAtomicFlush:0",
|
|
||||||
nullptr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Found the earliest, complete atomic flush. No earlier atomic flush is
|
|
||||||
// pending. Therefore ready to record it to the MANIFEST.
|
|
||||||
uint32_t num_entries = 0;
|
|
||||||
autovector<ColumnFamilyData*> tmp_cfds;
|
|
||||||
autovector<const MutableCFOptions*> tmp_mutable_cf_options_list;
|
|
||||||
std::vector<autovector<MemTable*>> memtables_to_flush;
|
|
||||||
autovector<autovector<VersionEdit*>> edit_lists;
|
|
||||||
for (auto pos : batch) {
|
|
||||||
tmp_cfds.emplace_back(cfds[pos]);
|
|
||||||
tmp_mutable_cf_options_list.emplace_back(mutable_cf_options_list[pos]);
|
|
||||||
const auto& memlist = imm_lists[pos]->current_->memlist_;
|
|
||||||
uint64_t batch_file_number = 0;
|
|
||||||
autovector<MemTable*> tmp_mems;
|
|
||||||
autovector<VersionEdit*> edits;
|
|
||||||
for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
|
|
||||||
MemTable* m = *it;
|
|
||||||
if (!m->flush_completed_ ||
|
|
||||||
(it != memlist.rbegin() && m->file_number_ != batch_file_number)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (it == memlist.rbegin()) {
|
|
||||||
batch_file_number = m->file_number_;
|
|
||||||
edits.push_back(m->GetEdits());
|
|
||||||
++num_entries;
|
|
||||||
}
|
|
||||||
tmp_mems.push_back(m);
|
|
||||||
}
|
|
||||||
edit_lists.push_back(edits);
|
|
||||||
memtables_to_flush.push_back(tmp_mems);
|
|
||||||
}
|
|
||||||
TEST_SYNC_POINT_CALLBACK(
|
|
||||||
"MemTableList::TryInstallMemtableFlushResults:FoundBatchToCommit:0",
|
|
||||||
&num_entries);
|
|
||||||
|
|
||||||
// Mark the version edits as an atomic group
|
|
||||||
uint32_t remaining = num_entries;
|
|
||||||
for (auto& edit_list : edit_lists) {
|
|
||||||
assert(edit_list.size() == 1);
|
|
||||||
edit_list[0]->MarkAtomicGroup(--remaining);
|
|
||||||
}
|
|
||||||
assert(remaining == 0);
|
|
||||||
|
|
||||||
size_t batch_sz = batch.size();
|
|
||||||
assert(batch_sz > 0);
|
|
||||||
assert(batch_sz == memtables_to_flush.size());
|
|
||||||
assert(batch_sz == tmp_cfds.size());
|
|
||||||
assert(batch_sz == edit_lists.size());
|
|
||||||
|
|
||||||
if (vset->db_options()->allow_2pc) {
|
|
||||||
for (size_t i = 0; i != batch_sz; ++i) {
|
|
||||||
auto& edit_list = edit_lists[i];
|
|
||||||
assert(!edit_list.empty());
|
|
||||||
edit_list.back()->SetMinLogNumberToKeep(
|
|
||||||
PrecomputeMinLogNumberToKeep(vset, *tmp_cfds[i], edit_list,
|
|
||||||
memtables_to_flush[i], prep_tracker));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// this can release and reacquire the mutex.
|
|
||||||
s = vset->LogAndApply(tmp_cfds, tmp_mutable_cf_options_list, edit_lists, mu,
|
|
||||||
db_directory);
|
|
||||||
|
|
||||||
for (const auto pos : batch) {
|
|
||||||
imm_lists[pos]->InstallNewVersion();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (s.ok()) {
|
|
||||||
for (size_t i = 0; i != batch_sz; ++i) {
|
|
||||||
if (tmp_cfds[i]->IsDropped()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
size_t pos = batch[i];
|
|
||||||
for (auto m : memtables_to_flush[i]) {
|
|
||||||
assert(m->file_number_ > 0);
|
|
||||||
uint64_t mem_id = m->GetID();
|
|
||||||
ROCKS_LOG_BUFFER(log_buffer,
|
|
||||||
"[%s] Level-0 commit table #%" PRIu64
|
|
||||||
": memtable #%" PRIu64 " done",
|
|
||||||
tmp_cfds[i]->GetName().c_str(), m->file_number_,
|
|
||||||
mem_id);
|
|
||||||
imm_lists[pos]->current_->Remove(m, to_delete);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i != batch_sz; ++i) {
|
|
||||||
size_t pos = batch[i];
|
|
||||||
for (auto m : memtables_to_flush[i]) {
|
|
||||||
uint64_t mem_id = m->GetID();
|
|
||||||
ROCKS_LOG_BUFFER(log_buffer,
|
|
||||||
"[%s] Level-0 commit table #%" PRIu64
|
|
||||||
": memtable #%" PRIu64 " failed",
|
|
||||||
tmp_cfds[i]->GetName().c_str(), m->file_number_,
|
|
||||||
mem_id);
|
|
||||||
m->flush_completed_ = false;
|
|
||||||
m->flush_in_progress_ = false;
|
|
||||||
m->edit_.Clear();
|
|
||||||
m->file_number_ = 0;
|
|
||||||
imm_lists[pos]->num_flush_not_started_++;
|
|
||||||
}
|
|
||||||
imm_lists[pos]->imm_flush_needed.store(true, std::memory_order_release);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Adjust the heap AFTER installing new MemTableListVersions because the
|
|
||||||
// compare function 'comp' needs to capture the most up-to-date state of
|
|
||||||
// imm_lists.
|
|
||||||
for (auto pos : batch) {
|
|
||||||
const auto& memlist = imm_lists[pos]->current_->memlist_;
|
|
||||||
if (!memlist.empty()) {
|
|
||||||
MemTable* mem = *(memlist.rbegin());
|
|
||||||
if (mem->flush_completed_) {
|
|
||||||
heap.emplace(pos);
|
|
||||||
} else if (min_unfinished_seqno > mem->atomic_flush_seqno_) {
|
|
||||||
min_unfinished_seqno = mem->atomic_flush_seqno_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*atomic_flush_commit_in_progress = false;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns true if there is at least one memtable on which flush has
|
// Returns true if there is at least one memtable on which flush has
|
||||||
// not yet started.
|
// not yet started.
|
||||||
bool MemTableList::IsFlushPending() const {
|
bool MemTableList::IsFlushPending() const {
|
||||||
@ -749,4 +527,106 @@ uint64_t MemTableList::PrecomputeMinLogContainingPrepSection(
|
|||||||
return min_log;
|
return min_log;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Commit a successful atomic flush in the manifest file.
|
||||||
|
Status InstallMemtableAtomicFlushResults(
|
||||||
|
const autovector<MemTableList*>* imm_lists,
|
||||||
|
const autovector<ColumnFamilyData*>& cfds,
|
||||||
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
||||||
|
const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
|
||||||
|
InstrumentedMutex* mu, const autovector<FileMetaData*>& file_metas,
|
||||||
|
autovector<MemTable*>* to_delete, Directory* db_directory,
|
||||||
|
LogBuffer* log_buffer) {
|
||||||
|
AutoThreadOperationStageUpdater stage_updater(
|
||||||
|
ThreadStatus::STAGE_MEMTABLE_INSTALL_FLUSH_RESULTS);
|
||||||
|
mu->AssertHeld();
|
||||||
|
|
||||||
|
size_t num = mems_list.size();
|
||||||
|
assert(cfds.size() == num);
|
||||||
|
if (imm_lists != nullptr) {
|
||||||
|
assert(imm_lists->size() == num);
|
||||||
|
}
|
||||||
|
for (size_t k = 0; k != num; ++k) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
const auto* imm =
|
||||||
|
(imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k);
|
||||||
|
if (!mems_list[k]->empty()) {
|
||||||
|
assert((*mems_list[k])[0]->GetID() == imm->GetEarliestMemTableID());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
assert(nullptr != file_metas[k]);
|
||||||
|
for (size_t i = 0; i != mems_list[k]->size(); ++i) {
|
||||||
|
assert(i == 0 || (*mems_list[k])[i]->GetEdits()->NumEntries() == 0);
|
||||||
|
(*mems_list[k])[i]->SetFlushCompleted(true);
|
||||||
|
(*mems_list[k])[i]->SetFileNumber(file_metas[k]->fd.GetNumber());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Status s;
|
||||||
|
|
||||||
|
autovector<autovector<VersionEdit*>> edit_lists;
|
||||||
|
uint32_t num_entries = 0;
|
||||||
|
for (const auto mems : mems_list) {
|
||||||
|
assert(mems != nullptr);
|
||||||
|
autovector<VersionEdit*> edits;
|
||||||
|
assert(!mems->empty());
|
||||||
|
edits.emplace_back((*mems)[0]->GetEdits());
|
||||||
|
++num_entries;
|
||||||
|
edit_lists.emplace_back(edits);
|
||||||
|
}
|
||||||
|
// Mark the version edits as an atomic group
|
||||||
|
for (auto& edits : edit_lists) {
|
||||||
|
assert(edits.size() == 1);
|
||||||
|
edits[0]->MarkAtomicGroup(--num_entries);
|
||||||
|
}
|
||||||
|
assert(0 == num_entries);
|
||||||
|
|
||||||
|
// this can release and reacquire the mutex.
|
||||||
|
s = vset->LogAndApply(cfds, mutable_cf_options_list, edit_lists, mu,
|
||||||
|
db_directory);
|
||||||
|
|
||||||
|
for (size_t k = 0; k != cfds.size(); ++k) {
|
||||||
|
auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k);
|
||||||
|
imm->InstallNewVersion();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s.ok() || s.IsShutdownInProgress()) {
|
||||||
|
for (size_t i = 0; i != cfds.size(); ++i) {
|
||||||
|
if (cfds[i]->IsDropped()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i);
|
||||||
|
for (auto m : *mems_list[i]) {
|
||||||
|
assert(m->GetFileNumber() > 0);
|
||||||
|
uint64_t mem_id = m->GetID();
|
||||||
|
ROCKS_LOG_BUFFER(log_buffer,
|
||||||
|
"[%s] Level-0 commit table #%" PRIu64
|
||||||
|
": memtable #%" PRIu64 " done",
|
||||||
|
cfds[i]->GetName().c_str(), m->GetFileNumber(),
|
||||||
|
mem_id);
|
||||||
|
imm->current_->Remove(m, to_delete);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i != cfds.size(); ++i) {
|
||||||
|
auto* imm = (imm_lists == nullptr) ? cfds[i]->imm() : imm_lists->at(i);
|
||||||
|
for (auto m : *mems_list[i]) {
|
||||||
|
uint64_t mem_id = m->GetID();
|
||||||
|
ROCKS_LOG_BUFFER(log_buffer,
|
||||||
|
"[%s] Level-0 commit table #%" PRIu64
|
||||||
|
": memtable #%" PRIu64 " failed",
|
||||||
|
cfds[i]->GetName().c_str(), m->GetFileNumber(),
|
||||||
|
mem_id);
|
||||||
|
m->SetFlushCompleted(false);
|
||||||
|
m->SetFlushInProgress(false);
|
||||||
|
m->GetEdits()->Clear();
|
||||||
|
m->SetFileNumber(0);
|
||||||
|
imm->num_flush_not_started_++;
|
||||||
|
}
|
||||||
|
imm->imm_flush_needed.store(true, std::memory_order_release);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/logs_with_prep_tracker.h"
|
#include "db/logs_with_prep_tracker.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "monitoring/instrumented_mutex.h"
|
#include "monitoring/instrumented_mutex.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
@ -31,6 +31,7 @@ class ColumnFamilyData;
|
|||||||
class InternalKeyComparator;
|
class InternalKeyComparator;
|
||||||
class InstrumentedMutex;
|
class InstrumentedMutex;
|
||||||
class MergeIteratorBuilder;
|
class MergeIteratorBuilder;
|
||||||
|
class MemTableList;
|
||||||
|
|
||||||
// keeps a list of immutable memtables in a vector. the list is immutable
|
// keeps a list of immutable memtables in a vector. the list is immutable
|
||||||
// if refcount is bigger than one. It is used as a state for Get() and
|
// if refcount is bigger than one. It is used as a state for Get() and
|
||||||
@ -91,7 +92,7 @@ class MemTableListVersion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena,
|
Status AddRangeTombstoneIterators(const ReadOptions& read_opts, Arena* arena,
|
||||||
RangeDelAggregatorV2* range_del_agg);
|
RangeDelAggregator* range_del_agg);
|
||||||
|
|
||||||
void AddIterators(const ReadOptions& options,
|
void AddIterators(const ReadOptions& options,
|
||||||
std::vector<InternalIterator*>* iterator_list,
|
std::vector<InternalIterator*>* iterator_list,
|
||||||
@ -114,6 +115,18 @@ class MemTableListVersion {
|
|||||||
SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const;
|
SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend class MemTableList;
|
||||||
|
|
||||||
|
friend Status InstallMemtableAtomicFlushResults(
|
||||||
|
const autovector<MemTableList*>* imm_lists,
|
||||||
|
const autovector<ColumnFamilyData*>& cfds,
|
||||||
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
||||||
|
const autovector<const autovector<MemTable*>*>& mems_list,
|
||||||
|
VersionSet* vset, InstrumentedMutex* mu,
|
||||||
|
const autovector<FileMetaData*>& file_meta,
|
||||||
|
autovector<MemTable*>* to_delete, Directory* db_directory,
|
||||||
|
LogBuffer* log_buffer);
|
||||||
|
|
||||||
// REQUIRE: m is an immutable memtable
|
// REQUIRE: m is an immutable memtable
|
||||||
void Add(MemTable* m, autovector<MemTable*>* to_delete);
|
void Add(MemTable* m, autovector<MemTable*>* to_delete);
|
||||||
// REQUIRE: m is an immutable memtable
|
// REQUIRE: m is an immutable memtable
|
||||||
@ -132,8 +145,6 @@ class MemTableListVersion {
|
|||||||
|
|
||||||
void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m);
|
void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m);
|
||||||
|
|
||||||
friend class MemTableList;
|
|
||||||
|
|
||||||
// Immutable MemTables that have not yet been flushed.
|
// Immutable MemTables that have not yet been flushed.
|
||||||
std::list<MemTable*> memlist_;
|
std::list<MemTable*> memlist_;
|
||||||
|
|
||||||
@ -163,18 +174,6 @@ class MemTableListVersion {
|
|||||||
// write thread.)
|
// write thread.)
|
||||||
class MemTableList {
|
class MemTableList {
|
||||||
public:
|
public:
|
||||||
// Commit a successful atomic flush in the manifest file
|
|
||||||
static Status TryInstallMemtableFlushResults(
|
|
||||||
autovector<MemTableList*>& imm_lists,
|
|
||||||
const autovector<ColumnFamilyData*>& cfds,
|
|
||||||
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
|
||||||
const autovector<const autovector<MemTable*>*>& mems_list,
|
|
||||||
bool* atomic_flush_commit_in_progress, LogsWithPrepTracker* prep_tracker,
|
|
||||||
VersionSet* vset, InstrumentedMutex* mu,
|
|
||||||
const autovector<FileMetaData>& file_meta,
|
|
||||||
autovector<MemTable*>* to_delete, Directory* db_directory,
|
|
||||||
LogBuffer* log_buffer);
|
|
||||||
|
|
||||||
// A list of memtables.
|
// A list of memtables.
|
||||||
explicit MemTableList(int min_write_buffer_number_to_merge,
|
explicit MemTableList(int min_write_buffer_number_to_merge,
|
||||||
int max_write_buffer_number_to_maintain)
|
int max_write_buffer_number_to_maintain)
|
||||||
@ -296,6 +295,16 @@ class MemTableList {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend Status InstallMemtableAtomicFlushResults(
|
||||||
|
const autovector<MemTableList*>* imm_lists,
|
||||||
|
const autovector<ColumnFamilyData*>& cfds,
|
||||||
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
||||||
|
const autovector<const autovector<MemTable*>*>& mems_list,
|
||||||
|
VersionSet* vset, InstrumentedMutex* mu,
|
||||||
|
const autovector<FileMetaData*>& file_meta,
|
||||||
|
autovector<MemTable*>* to_delete, Directory* db_directory,
|
||||||
|
LogBuffer* log_buffer);
|
||||||
|
|
||||||
// DB mutex held
|
// DB mutex held
|
||||||
void InstallNewVersion();
|
void InstallNewVersion();
|
||||||
|
|
||||||
@ -317,4 +326,18 @@ class MemTableList {
|
|||||||
size_t current_memory_usage_;
|
size_t current_memory_usage_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Installs memtable atomic flush results.
|
||||||
|
// In most cases, imm_lists is nullptr, and the function simply uses the
|
||||||
|
// immutable memtable lists associated with the cfds. There are unit tests that
|
||||||
|
// installs flush results for external immutable memtable lists other than the
|
||||||
|
// cfds' own immutable memtable lists, e.g. MemTableLIstTest. In this case,
|
||||||
|
// imm_lists parameter is not nullptr.
|
||||||
|
extern Status InstallMemtableAtomicFlushResults(
|
||||||
|
const autovector<MemTableList*>* imm_lists,
|
||||||
|
const autovector<ColumnFamilyData*>& cfds,
|
||||||
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
||||||
|
const autovector<const autovector<MemTable*>*>& mems_list, VersionSet* vset,
|
||||||
|
InstrumentedMutex* mu, const autovector<FileMetaData*>& file_meta,
|
||||||
|
autovector<MemTable*>* to_delete, Directory* db_directory,
|
||||||
|
LogBuffer* log_buffer);
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "db/merge_context.h"
|
#include "db/merge_context.h"
|
||||||
#include "db/range_del_aggregator.h"
|
|
||||||
#include "db/version_set.h"
|
#include "db/version_set.h"
|
||||||
#include "db/write_controller.h"
|
#include "db/write_controller.h"
|
||||||
#include "rocksdb/db.h"
|
#include "rocksdb/db.h"
|
||||||
@ -86,17 +85,46 @@ class MemTableListTest : public testing::Test {
|
|||||||
Status Mock_InstallMemtableFlushResults(
|
Status Mock_InstallMemtableFlushResults(
|
||||||
MemTableList* list, const MutableCFOptions& mutable_cf_options,
|
MemTableList* list, const MutableCFOptions& mutable_cf_options,
|
||||||
const autovector<MemTable*>& m, autovector<MemTable*>* to_delete) {
|
const autovector<MemTable*>& m, autovector<MemTable*>* to_delete) {
|
||||||
autovector<MemTableList*> lists;
|
// Create a mock Logger
|
||||||
lists.emplace_back(list);
|
test::NullLogger logger;
|
||||||
autovector<const autovector<MemTable*>*> mems_list;
|
LogBuffer log_buffer(DEBUG_LEVEL, &logger);
|
||||||
mems_list.emplace_back(&m);
|
|
||||||
return Mock_InstallMemtableFlushResults(
|
CreateDB();
|
||||||
lists, {0} /* cf_ids */, {&mutable_cf_options}, mems_list, to_delete);
|
// Create a mock VersionSet
|
||||||
|
DBOptions db_options;
|
||||||
|
ImmutableDBOptions immutable_db_options(db_options);
|
||||||
|
EnvOptions env_options;
|
||||||
|
std::shared_ptr<Cache> table_cache(NewLRUCache(50000, 16));
|
||||||
|
WriteBufferManager write_buffer_manager(db_options.db_write_buffer_size);
|
||||||
|
WriteController write_controller(10000000u);
|
||||||
|
|
||||||
|
VersionSet versions(dbname, &immutable_db_options, env_options,
|
||||||
|
table_cache.get(), &write_buffer_manager,
|
||||||
|
&write_controller);
|
||||||
|
std::vector<ColumnFamilyDescriptor> cf_descs;
|
||||||
|
cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions());
|
||||||
|
cf_descs.emplace_back("one", ColumnFamilyOptions());
|
||||||
|
cf_descs.emplace_back("two", ColumnFamilyOptions());
|
||||||
|
|
||||||
|
EXPECT_OK(versions.Recover(cf_descs, false));
|
||||||
|
|
||||||
|
// Create mock default ColumnFamilyData
|
||||||
|
auto column_family_set = versions.GetColumnFamilySet();
|
||||||
|
LogsWithPrepTracker dummy_prep_tracker;
|
||||||
|
auto cfd = column_family_set->GetDefault();
|
||||||
|
EXPECT_TRUE(nullptr != cfd);
|
||||||
|
uint64_t file_num = file_number.fetch_add(1);
|
||||||
|
// Create dummy mutex.
|
||||||
|
InstrumentedMutex mutex;
|
||||||
|
InstrumentedMutexLock l(&mutex);
|
||||||
|
return list->TryInstallMemtableFlushResults(
|
||||||
|
cfd, mutable_cf_options, m, &dummy_prep_tracker, &versions, &mutex,
|
||||||
|
file_num, to_delete, nullptr, &log_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calls MemTableList::InstallMemtableFlushResults() and sets up all
|
// Calls MemTableList::InstallMemtableFlushResults() and sets up all
|
||||||
// structures needed to call this function.
|
// structures needed to call this function.
|
||||||
Status Mock_InstallMemtableFlushResults(
|
Status Mock_InstallMemtableAtomicFlushResults(
|
||||||
autovector<MemTableList*>& lists, const autovector<uint32_t>& cf_ids,
|
autovector<MemTableList*>& lists, const autovector<uint32_t>& cf_ids,
|
||||||
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
|
||||||
const autovector<const autovector<MemTable*>*>& mems_list,
|
const autovector<const autovector<MemTable*>*>& mems_list,
|
||||||
@ -128,44 +156,28 @@ class MemTableListTest : public testing::Test {
|
|||||||
auto column_family_set = versions.GetColumnFamilySet();
|
auto column_family_set = versions.GetColumnFamilySet();
|
||||||
|
|
||||||
LogsWithPrepTracker dummy_prep_tracker;
|
LogsWithPrepTracker dummy_prep_tracker;
|
||||||
if (1 == cf_ids.size()) {
|
|
||||||
auto cfd = column_family_set->GetColumnFamily(cf_ids[0]);
|
|
||||||
EXPECT_TRUE(nullptr != cfd);
|
|
||||||
EXPECT_EQ(1, lists.size());
|
|
||||||
MemTableList* list = lists[0];
|
|
||||||
EXPECT_EQ(1, mutable_cf_options_list.size());
|
|
||||||
const MutableCFOptions& mutable_cf_options =
|
|
||||||
*(mutable_cf_options_list.at(0));
|
|
||||||
const autovector<MemTable*>* mems = mems_list.at(0);
|
|
||||||
EXPECT_TRUE(nullptr != mems);
|
|
||||||
|
|
||||||
uint64_t file_num = file_number.fetch_add(1);
|
|
||||||
// Create dummy mutex.
|
|
||||||
InstrumentedMutex mutex;
|
|
||||||
InstrumentedMutexLock l(&mutex);
|
|
||||||
return list->TryInstallMemtableFlushResults(
|
|
||||||
cfd, mutable_cf_options, *mems, &dummy_prep_tracker, &versions,
|
|
||||||
&mutex, file_num, to_delete, nullptr, &log_buffer);
|
|
||||||
}
|
|
||||||
autovector<ColumnFamilyData*> cfds;
|
autovector<ColumnFamilyData*> cfds;
|
||||||
for (int i = 0; i != static_cast<int>(cf_ids.size()); ++i) {
|
for (int i = 0; i != static_cast<int>(cf_ids.size()); ++i) {
|
||||||
cfds.emplace_back(column_family_set->GetColumnFamily(cf_ids[i]));
|
cfds.emplace_back(column_family_set->GetColumnFamily(cf_ids[i]));
|
||||||
EXPECT_NE(nullptr, cfds[i]);
|
EXPECT_NE(nullptr, cfds[i]);
|
||||||
}
|
}
|
||||||
autovector<FileMetaData> file_metas;
|
std::vector<FileMetaData> file_metas;
|
||||||
|
file_metas.reserve(cf_ids.size());
|
||||||
for (size_t i = 0; i != cf_ids.size(); ++i) {
|
for (size_t i = 0; i != cf_ids.size(); ++i) {
|
||||||
FileMetaData meta;
|
FileMetaData meta;
|
||||||
uint64_t file_num = file_number.fetch_add(1);
|
uint64_t file_num = file_number.fetch_add(1);
|
||||||
meta.fd = FileDescriptor(file_num, 0, 0);
|
meta.fd = FileDescriptor(file_num, 0, 0);
|
||||||
file_metas.emplace_back(meta);
|
file_metas.emplace_back(meta);
|
||||||
}
|
}
|
||||||
bool atomic_flush_commit_in_progress = false;
|
autovector<FileMetaData*> file_meta_ptrs;
|
||||||
|
for (auto& meta : file_metas) {
|
||||||
|
file_meta_ptrs.push_back(&meta);
|
||||||
|
}
|
||||||
InstrumentedMutex mutex;
|
InstrumentedMutex mutex;
|
||||||
InstrumentedMutexLock l(&mutex);
|
InstrumentedMutexLock l(&mutex);
|
||||||
return MemTableList::TryInstallMemtableFlushResults(
|
return InstallMemtableAtomicFlushResults(
|
||||||
lists, cfds, mutable_cf_options_list, mems_list,
|
&lists, cfds, mutable_cf_options_list, mems_list, &versions, &mutex,
|
||||||
&atomic_flush_commit_in_progress, &dummy_prep_tracker, &versions,
|
file_meta_ptrs, to_delete, nullptr, &log_buffer);
|
||||||
&mutex, file_metas, to_delete, nullptr, &log_buffer);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -731,18 +743,28 @@ TEST_F(MemTableListTest, FlushPendingTest) {
|
|||||||
to_delete.clear();
|
to_delete.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(MemTableListTest, FlushMultipleCFsTest) {
|
TEST_F(MemTableListTest, EmptyAtomicFlusTest) {
|
||||||
|
autovector<MemTableList*> lists;
|
||||||
|
autovector<uint32_t> cf_ids;
|
||||||
|
autovector<const MutableCFOptions*> options_list;
|
||||||
|
autovector<const autovector<MemTable*>*> to_flush;
|
||||||
|
autovector<MemTable*> to_delete;
|
||||||
|
Status s = Mock_InstallMemtableAtomicFlushResults(lists, cf_ids, options_list,
|
||||||
|
to_flush, &to_delete);
|
||||||
|
ASSERT_OK(s);
|
||||||
|
ASSERT_TRUE(to_delete.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MemTableListTest, AtomicFlusTest) {
|
||||||
const int num_cfs = 3;
|
const int num_cfs = 3;
|
||||||
const int num_tables_per_cf = 5;
|
const int num_tables_per_cf = 2;
|
||||||
SequenceNumber seq = 1;
|
SequenceNumber seq = 1;
|
||||||
Status s;
|
|
||||||
|
|
||||||
auto factory = std::make_shared<SkipListFactory>();
|
auto factory = std::make_shared<SkipListFactory>();
|
||||||
options.memtable_factory = factory;
|
options.memtable_factory = factory;
|
||||||
ImmutableCFOptions ioptions(options);
|
ImmutableCFOptions ioptions(options);
|
||||||
InternalKeyComparator cmp(BytewiseComparator());
|
InternalKeyComparator cmp(BytewiseComparator());
|
||||||
WriteBufferManager wb(options.db_write_buffer_size);
|
WriteBufferManager wb(options.db_write_buffer_size);
|
||||||
autovector<MemTable*> to_delete;
|
|
||||||
|
|
||||||
// Create MemTableLists
|
// Create MemTableLists
|
||||||
int min_write_buffer_number_to_merge = 3;
|
int min_write_buffer_number_to_merge = 3;
|
||||||
@ -783,135 +805,72 @@ TEST_F(MemTableListTest, FlushMultipleCFsTest) {
|
|||||||
std::vector<autovector<MemTable*>> flush_candidates(num_cfs);
|
std::vector<autovector<MemTable*>> flush_candidates(num_cfs);
|
||||||
|
|
||||||
// Nothing to flush
|
// Nothing to flush
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
auto list = lists[i];
|
auto* list = lists[i];
|
||||||
ASSERT_FALSE(list->IsFlushPending());
|
ASSERT_FALSE(list->IsFlushPending());
|
||||||
ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire));
|
ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire));
|
||||||
list->PickMemtablesToFlush(nullptr /* memtable_id */, &flush_candidates[i]);
|
list->PickMemtablesToFlush(nullptr /* memtable_id */, &flush_candidates[i]);
|
||||||
ASSERT_EQ(0, static_cast<int>(flush_candidates[i].size()));
|
ASSERT_EQ(0, flush_candidates[i].size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Request flush even though there is nothing to flush
|
// Request flush even though there is nothing to flush
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
auto list = lists[i];
|
auto* list = lists[i];
|
||||||
list->FlushRequested();
|
list->FlushRequested();
|
||||||
ASSERT_FALSE(list->IsFlushPending());
|
ASSERT_FALSE(list->IsFlushPending());
|
||||||
ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire));
|
ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire));
|
||||||
}
|
}
|
||||||
|
autovector<MemTable*> to_delete;
|
||||||
// Add tables to column families
|
// Add tables to the immutable memtalbe lists associated with column families
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
for (int j = 0; j != num_tables_per_cf; ++j) {
|
for (auto j = 0; j != num_tables_per_cf; ++j) {
|
||||||
lists[i]->Add(tables[i][j], &to_delete);
|
lists[i]->Add(tables[i][j], &to_delete);
|
||||||
}
|
}
|
||||||
ASSERT_EQ(num_tables_per_cf, lists[i]->NumNotFlushed());
|
ASSERT_EQ(num_tables_per_cf, lists[i]->NumNotFlushed());
|
||||||
ASSERT_TRUE(lists[i]->IsFlushPending());
|
ASSERT_TRUE(lists[i]->IsFlushPending());
|
||||||
ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire));
|
ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire));
|
||||||
}
|
}
|
||||||
|
std::vector<uint64_t> flush_memtable_ids = {1, 1, 0};
|
||||||
autovector<const autovector<MemTable*>*> to_flush;
|
|
||||||
std::vector<uint64_t> prev_memtable_ids;
|
|
||||||
// For each column family, determine the memtables to flush
|
|
||||||
for (int k = 0; k != 4; ++k) {
|
|
||||||
std::vector<uint64_t> flush_memtable_ids;
|
|
||||||
if (0 == k) {
|
|
||||||
// +----+
|
// +----+
|
||||||
// list[0]: |0 1| 2 3 4
|
// list[0]: |0 1|
|
||||||
// list[1]: |0 1| 2 3 4
|
// list[1]: |0 1|
|
||||||
// | +--+
|
// | +--+
|
||||||
// list[2]: |0| 1 2 3 4
|
// list[2]: |0| 1
|
||||||
// +-+
|
// +-+
|
||||||
flush_memtable_ids = {1, 1, 0};
|
|
||||||
} else if (1 == k) {
|
|
||||||
// +----+ +---+
|
|
||||||
// list[0]: |0 1| |2 3| 4
|
|
||||||
// list[1]: |0 1| |2 3| 4
|
|
||||||
// | +--+ +---+
|
|
||||||
// list[2]: |0| 1 2 3 4
|
|
||||||
// +-+
|
|
||||||
flush_memtable_ids = {3, 3, 0};
|
|
||||||
} else if (2 == k) {
|
|
||||||
// +-----+ +---+
|
|
||||||
// list[0]: |0 1| |2 3| 4
|
|
||||||
// list[1]: |0 1| |2 3| 4
|
|
||||||
// | +---+ +---+
|
|
||||||
// | | +-------+
|
|
||||||
// list[2]: |0| |1 2 3| 4
|
|
||||||
// +-+ +-------+
|
|
||||||
flush_memtable_ids = {3, 3, 3};
|
|
||||||
} else {
|
|
||||||
// +-----+ +---+ +-+
|
|
||||||
// list[0]: |0 1| |2 3| |4|
|
|
||||||
// list[1]: |0 1| |2 3| |4|
|
|
||||||
// | +---+ +---+ | |
|
|
||||||
// | | +-------+ | |
|
|
||||||
// list[2]: |0| |1 2 3| |4|
|
|
||||||
// +-+ +-------+ +-+
|
|
||||||
flush_memtable_ids = {4, 4, 4};
|
|
||||||
}
|
|
||||||
assert(num_cfs == static_cast<int>(flush_memtable_ids.size()));
|
|
||||||
|
|
||||||
// Pick memtables to flush
|
// Pick memtables to flush
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
flush_candidates[i].clear();
|
flush_candidates[i].clear();
|
||||||
lists[i]->PickMemtablesToFlush(&flush_memtable_ids[i],
|
lists[i]->PickMemtablesToFlush(&flush_memtable_ids[i],
|
||||||
&flush_candidates[i]);
|
&flush_candidates[i]);
|
||||||
for (auto mem : flush_candidates[i]) {
|
ASSERT_EQ(flush_memtable_ids[i] - 0 + 1,
|
||||||
mem->TEST_AtomicFlushSequenceNumber() = SequenceNumber(k);
|
static_cast<uint64_t>(flush_candidates[i].size()));
|
||||||
}
|
}
|
||||||
if (prev_memtable_ids.empty()) {
|
autovector<MemTableList*> tmp_lists;
|
||||||
ASSERT_EQ(flush_memtable_ids[i] - 0 + 1, flush_candidates[i].size());
|
autovector<uint32_t> tmp_cf_ids;
|
||||||
} else {
|
autovector<const MutableCFOptions*> tmp_options_list;
|
||||||
ASSERT_EQ(flush_memtable_ids[i] - prev_memtable_ids[i],
|
autovector<const autovector<MemTable*>*> to_flush;
|
||||||
flush_candidates[i].size());
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
}
|
if (!flush_candidates[i].empty()) {
|
||||||
ASSERT_EQ(num_tables_per_cf, lists[i]->NumNotFlushed());
|
to_flush.push_back(&flush_candidates[i]);
|
||||||
ASSERT_FALSE(lists[i]->HasFlushRequested());
|
tmp_lists.push_back(lists[i]);
|
||||||
if (flush_memtable_ids[i] == num_tables_per_cf - 1) {
|
tmp_cf_ids.push_back(i);
|
||||||
ASSERT_FALSE(
|
tmp_options_list.push_back(mutable_cf_options_list[i]);
|
||||||
lists[i]->imm_flush_needed.load(std::memory_order_acquire));
|
|
||||||
} else {
|
|
||||||
ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prev_memtable_ids = flush_memtable_ids;
|
Status s = Mock_InstallMemtableAtomicFlushResults(
|
||||||
|
tmp_lists, tmp_cf_ids, tmp_options_list, to_flush, &to_delete);
|
||||||
if (k < 3) {
|
|
||||||
for (const auto& mems : flush_candidates) {
|
|
||||||
uint64_t file_num = file_number.fetch_add(1);
|
|
||||||
for (auto m : mems) {
|
|
||||||
m->TEST_SetFlushCompleted(true);
|
|
||||||
m->TEST_SetFileNumber(file_num);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (k == 0) {
|
|
||||||
// Rollback first pick of tables
|
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
|
||||||
auto list = lists[i];
|
|
||||||
const auto& mems = flush_candidates[i];
|
|
||||||
for (auto m : mems) {
|
|
||||||
m->TEST_SetFileNumber(0);
|
|
||||||
}
|
|
||||||
list->RollbackMemtableFlush(flush_candidates[i], 0);
|
|
||||||
ASSERT_TRUE(list->IsFlushPending());
|
|
||||||
ASSERT_TRUE(list->imm_flush_needed.load(std::memory_order_acquire));
|
|
||||||
}
|
|
||||||
prev_memtable_ids.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (k == 3) {
|
|
||||||
for (int i = 0; i != num_cfs; ++i) {
|
|
||||||
to_flush.emplace_back(&flush_candidates[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
s = Mock_InstallMemtableFlushResults(lists, cf_ids, mutable_cf_options_list,
|
|
||||||
to_flush, &to_delete);
|
|
||||||
ASSERT_OK(s);
|
ASSERT_OK(s);
|
||||||
|
|
||||||
|
for (auto i = 0; i != num_cfs; ++i) {
|
||||||
|
for (auto j = 0; j != num_tables_per_cf; ++j) {
|
||||||
|
if (static_cast<uint64_t>(j) <= flush_memtable_ids[i]) {
|
||||||
|
ASSERT_LT(0, tables[i][j]->GetFileNumber());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_EQ(
|
||||||
|
static_cast<size_t>(num_tables_per_cf) - flush_candidates[i].size(),
|
||||||
|
lists[i]->NumNotFlushed());
|
||||||
|
}
|
||||||
|
|
||||||
to_delete.clear();
|
to_delete.clear();
|
||||||
for (auto list : lists) {
|
for (auto list : lists) {
|
||||||
list->current()->Unref(&to_delete);
|
list->current()->Unref(&to_delete);
|
||||||
@ -933,126 +892,6 @@ TEST_F(MemTableListTest, FlushMultipleCFsTest) {
|
|||||||
ASSERT_EQ(m, m->Unref());
|
ASSERT_EQ(m, m->Unref());
|
||||||
delete m;
|
delete m;
|
||||||
}
|
}
|
||||||
to_delete.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(MemTableListTest, HasOlderAtomicFlush) {
|
|
||||||
const size_t num_cfs = 3;
|
|
||||||
const size_t num_memtables_per_cf = 2;
|
|
||||||
SequenceNumber seq = 1;
|
|
||||||
Status s;
|
|
||||||
|
|
||||||
auto factory = std::make_shared<SkipListFactory>();
|
|
||||||
options.memtable_factory = factory;
|
|
||||||
ImmutableCFOptions ioptions(options);
|
|
||||||
InternalKeyComparator cmp(BytewiseComparator());
|
|
||||||
WriteBufferManager wb(options.db_write_buffer_size);
|
|
||||||
autovector<MemTable*> to_delete;
|
|
||||||
|
|
||||||
// Create MemTableLists
|
|
||||||
int min_write_buffer_number_to_merge = 3;
|
|
||||||
int max_write_buffer_number_to_maintain = 7;
|
|
||||||
autovector<MemTableList*> lists;
|
|
||||||
for (size_t i = 0; i != num_cfs; ++i) {
|
|
||||||
lists.emplace_back(new MemTableList(min_write_buffer_number_to_merge,
|
|
||||||
max_write_buffer_number_to_maintain));
|
|
||||||
}
|
|
||||||
|
|
||||||
autovector<uint32_t> cf_ids;
|
|
||||||
std::vector<std::vector<MemTable*>> tables;
|
|
||||||
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
|
||||||
uint32_t cf_id = 0;
|
|
||||||
for (size_t k = 0; k != num_cfs; ++k) {
|
|
||||||
std::vector<MemTable*> elem;
|
|
||||||
mutable_cf_options_list.emplace_back(new MutableCFOptions(options));
|
|
||||||
uint64_t memtable_id = 0;
|
|
||||||
for (int i = 0; i != num_memtables_per_cf; ++i) {
|
|
||||||
MemTable* mem =
|
|
||||||
new MemTable(cmp, ioptions, *(mutable_cf_options_list.back()), &wb,
|
|
||||||
kMaxSequenceNumber, cf_id);
|
|
||||||
mem->SetID(memtable_id++);
|
|
||||||
mem->Ref();
|
|
||||||
|
|
||||||
std::string value;
|
|
||||||
|
|
||||||
mem->Add(++seq, kTypeValue, "key1", ToString(i));
|
|
||||||
mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN");
|
|
||||||
mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value");
|
|
||||||
mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM");
|
|
||||||
mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), "");
|
|
||||||
|
|
||||||
elem.push_back(mem);
|
|
||||||
}
|
|
||||||
tables.emplace_back(elem);
|
|
||||||
cf_ids.push_back(cf_id++);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add tables to column families' immutable memtable lists
|
|
||||||
for (size_t i = 0; i != num_cfs; ++i) {
|
|
||||||
for (size_t j = 0; j != num_memtables_per_cf; ++j) {
|
|
||||||
lists[i]->Add(tables[i][j], &to_delete);
|
|
||||||
}
|
|
||||||
lists[i]->FlushRequested();
|
|
||||||
ASSERT_EQ(num_memtables_per_cf, lists[i]->NumNotFlushed());
|
|
||||||
ASSERT_TRUE(lists[i]->IsFlushPending());
|
|
||||||
ASSERT_TRUE(lists[i]->imm_flush_needed.load(std::memory_order_acquire));
|
|
||||||
}
|
|
||||||
std::vector<autovector<MemTable*>> flush_candidates(num_cfs);
|
|
||||||
for (size_t i = 0; i != num_cfs; ++i) {
|
|
||||||
lists[i]->PickMemtablesToFlush(nullptr, &flush_candidates[i]);
|
|
||||||
for (auto m : flush_candidates[i]) {
|
|
||||||
m->TEST_AtomicFlushSequenceNumber() = 123;
|
|
||||||
}
|
|
||||||
lists[i]->RollbackMemtableFlush(flush_candidates[i], 0);
|
|
||||||
}
|
|
||||||
uint64_t memtable_id = num_memtables_per_cf - 1;
|
|
||||||
autovector<MemTable*> other_flush_candidates;
|
|
||||||
lists[0]->PickMemtablesToFlush(&memtable_id, &other_flush_candidates);
|
|
||||||
for (auto m : other_flush_candidates) {
|
|
||||||
m->TEST_AtomicFlushSequenceNumber() = 124;
|
|
||||||
m->TEST_SetFlushCompleted(true);
|
|
||||||
m->TEST_SetFileNumber(1);
|
|
||||||
}
|
|
||||||
autovector<const autovector<MemTable*>*> to_flush;
|
|
||||||
to_flush.emplace_back(&other_flush_candidates);
|
|
||||||
bool has_older_unfinished_atomic_flush = false;
|
|
||||||
bool found_batch_to_commit = false;
|
|
||||||
|
|
||||||
SyncPoint::GetInstance()->SetCallBack(
|
|
||||||
"MemTableList::TryInstallMemtableFlushResults:"
|
|
||||||
"HasOlderUnfinishedAtomicFlush:0",
|
|
||||||
[&](void* /*arg*/) { has_older_unfinished_atomic_flush = true; });
|
|
||||||
SyncPoint::GetInstance()->SetCallBack(
|
|
||||||
"MemTableList::TryInstallMemtableFlushResults:FoundBatchToCommit:0",
|
|
||||||
[&](void* /*arg*/) { found_batch_to_commit = true; });
|
|
||||||
SyncPoint::GetInstance()->EnableProcessing();
|
|
||||||
|
|
||||||
s = Mock_InstallMemtableFlushResults(lists, cf_ids, mutable_cf_options_list,
|
|
||||||
to_flush, &to_delete);
|
|
||||||
ASSERT_OK(s);
|
|
||||||
ASSERT_TRUE(has_older_unfinished_atomic_flush);
|
|
||||||
ASSERT_FALSE(found_batch_to_commit);
|
|
||||||
|
|
||||||
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
||||||
|
|
||||||
ASSERT_TRUE(to_delete.empty());
|
|
||||||
for (auto list : lists) {
|
|
||||||
list->current()->Unref(&to_delete);
|
|
||||||
delete list;
|
|
||||||
}
|
|
||||||
lists.clear();
|
|
||||||
ASSERT_EQ(num_cfs * num_memtables_per_cf, to_delete.size());
|
|
||||||
for (auto m : to_delete) {
|
|
||||||
m->Ref();
|
|
||||||
ASSERT_EQ(m, m->Unref());
|
|
||||||
delete m;
|
|
||||||
}
|
|
||||||
to_delete.clear();
|
|
||||||
for (auto& opts : mutable_cf_options_list) {
|
|
||||||
delete opts;
|
|
||||||
opts = nullptr;
|
|
||||||
}
|
|
||||||
mutable_cf_options_list.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -79,7 +79,8 @@ class MergeContext {
|
|||||||
return GetOperandsDirectionForward();
|
return GetOperandsDirectionForward();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return all the operands in the order as they were merged (passed to FullMerge or FullMergeV2)
|
// Return all the operands in the order as they were merged (passed to
|
||||||
|
// FullMerge or FullMergeV2)
|
||||||
const std::vector<Slice>& GetOperandsDirectionForward() {
|
const std::vector<Slice>& GetOperandsDirectionForward() {
|
||||||
if (!operand_list_) {
|
if (!operand_list_) {
|
||||||
return empty_operand_list;
|
return empty_operand_list;
|
||||||
@ -89,7 +90,8 @@ class MergeContext {
|
|||||||
return *operand_list_;
|
return *operand_list_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return all the operands in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2)
|
// Return all the operands in the reversed order relative to how they were
|
||||||
|
// merged (passed to FullMerge or FullMergeV2)
|
||||||
const std::vector<Slice>& GetOperandsDirectionBackward() {
|
const std::vector<Slice>& GetOperandsDirectionBackward() {
|
||||||
if (!operand_list_) {
|
if (!operand_list_) {
|
||||||
return empty_operand_list;
|
return empty_operand_list;
|
||||||
|
@ -110,8 +110,11 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
|
|||||||
// keys_ stores the list of keys encountered while merging.
|
// keys_ stores the list of keys encountered while merging.
|
||||||
// operands_ stores the list of merge operands encountered while merging.
|
// operands_ stores the list of merge operands encountered while merging.
|
||||||
// keys_[i] corresponds to operands_[i] for each i.
|
// keys_[i] corresponds to operands_[i] for each i.
|
||||||
|
//
|
||||||
|
// TODO: Avoid the snapshot stripe map lookup in CompactionRangeDelAggregator
|
||||||
|
// and just pass the StripeRep corresponding to the stripe being merged.
|
||||||
Status MergeHelper::MergeUntil(InternalIterator* iter,
|
Status MergeHelper::MergeUntil(InternalIterator* iter,
|
||||||
RangeDelAggregator* range_del_agg,
|
CompactionRangeDelAggregator* range_del_agg,
|
||||||
const SequenceNumber stop_before,
|
const SequenceNumber stop_before,
|
||||||
const bool at_bottom) {
|
const bool at_bottom) {
|
||||||
// Get a copy of the internal key, before it's invalidated by iter->Next()
|
// Get a copy of the internal key, before it's invalidated by iter->Next()
|
||||||
|
@ -78,7 +78,7 @@ class MergeHelper {
|
|||||||
//
|
//
|
||||||
// REQUIRED: The first key in the input is not corrupted.
|
// REQUIRED: The first key in the input is not corrupted.
|
||||||
Status MergeUntil(InternalIterator* iter,
|
Status MergeUntil(InternalIterator* iter,
|
||||||
RangeDelAggregator* range_del_agg = nullptr,
|
CompactionRangeDelAggregator* range_del_agg = nullptr,
|
||||||
const SequenceNumber stop_before = 0,
|
const SequenceNumber stop_before = 0,
|
||||||
const bool at_bottom = false);
|
const bool at_bottom = false);
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,12 @@
|
|||||||
// Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
|
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
||||||
// This source code is licensed under both the GPLv2 (found in the
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
// COPYING file in the root directory) and Apache 2.0 License
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
@ -14,220 +16,416 @@
|
|||||||
#include "db/compaction_iteration_stats.h"
|
#include "db/compaction_iteration_stats.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/pinned_iterators_manager.h"
|
#include "db/pinned_iterators_manager.h"
|
||||||
|
#include "db/range_del_aggregator.h"
|
||||||
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "db/version_edit.h"
|
#include "db/version_edit.h"
|
||||||
#include "include/rocksdb/comparator.h"
|
#include "include/rocksdb/comparator.h"
|
||||||
#include "include/rocksdb/types.h"
|
#include "include/rocksdb/types.h"
|
||||||
#include "table/internal_iterator.h"
|
#include "table/internal_iterator.h"
|
||||||
#include "table/scoped_arena_iterator.h"
|
#include "table/scoped_arena_iterator.h"
|
||||||
#include "table/table_builder.h"
|
#include "table/table_builder.h"
|
||||||
|
#include "util/heap.h"
|
||||||
#include "util/kv_map.h"
|
#include "util/kv_map.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
// RangeDelMaps maintain position across calls to ShouldDelete. The caller may
|
class TruncatedRangeDelIterator {
|
||||||
// wish to specify a mode to optimize positioning the iterator during the next
|
|
||||||
// call to ShouldDelete. The non-kFullScan modes are only available when
|
|
||||||
// deletion collapsing is enabled.
|
|
||||||
//
|
|
||||||
// For example, if we invoke Next() on an iterator, kForwardTraversal should be
|
|
||||||
// specified to advance one-by-one through deletions until one is found with its
|
|
||||||
// interval containing the key. This will typically be faster than doing a full
|
|
||||||
// binary search (kBinarySearch).
|
|
||||||
enum class RangeDelPositioningMode {
|
|
||||||
kFullScan, // used iff collapse_deletions_ == false
|
|
||||||
kForwardTraversal,
|
|
||||||
kBackwardTraversal,
|
|
||||||
kBinarySearch,
|
|
||||||
};
|
|
||||||
|
|
||||||
// TruncatedRangeTombstones are a slight generalization of regular
|
|
||||||
// RangeTombstones that can represent truncations caused by SST boundaries.
|
|
||||||
// Instead of using user keys to represent the start and end keys, they instead
|
|
||||||
// use internal keys, whose sequence number indicates the sequence number of
|
|
||||||
// the smallest/largest SST key (in the case where a tombstone is untruncated,
|
|
||||||
// the sequence numbers will be kMaxSequenceNumber for both start and end
|
|
||||||
// keys). Like RangeTombstones, TruncatedRangeTombstone are also
|
|
||||||
// end-key-exclusive.
|
|
||||||
struct TruncatedRangeTombstone {
|
|
||||||
TruncatedRangeTombstone(const ParsedInternalKey& sk,
|
|
||||||
const ParsedInternalKey& ek, SequenceNumber s)
|
|
||||||
: start_key_(sk), end_key_(ek), seq_(s) {}
|
|
||||||
|
|
||||||
RangeTombstone Tombstone() const {
|
|
||||||
// The RangeTombstone returned here can cover less than the
|
|
||||||
// TruncatedRangeTombstone when its end key has a seqnum that is not
|
|
||||||
// kMaxSequenceNumber. Since this method is only used by RangeDelIterators
|
|
||||||
// (which in turn are only used during flush/compaction), we avoid this
|
|
||||||
// problem by using truncation boundaries spanning multiple SSTs, which
|
|
||||||
// are selected in a way that guarantee a clean break at the end key.
|
|
||||||
assert(end_key_.sequence == kMaxSequenceNumber);
|
|
||||||
return RangeTombstone(start_key_.user_key, end_key_.user_key, seq_);
|
|
||||||
}
|
|
||||||
|
|
||||||
ParsedInternalKey start_key_;
|
|
||||||
ParsedInternalKey end_key_;
|
|
||||||
SequenceNumber seq_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A RangeDelIterator iterates over range deletion tombstones.
|
|
||||||
class RangeDelIterator {
|
|
||||||
public:
|
public:
|
||||||
virtual ~RangeDelIterator() = default;
|
TruncatedRangeDelIterator(
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
|
||||||
|
const InternalKeyComparator* icmp, const InternalKey* smallest,
|
||||||
|
const InternalKey* largest);
|
||||||
|
|
||||||
virtual bool Valid() const = 0;
|
bool Valid() const;
|
||||||
virtual void Next() = 0;
|
|
||||||
// NOTE: the Slice passed to this method must be a user key.
|
|
||||||
virtual void Seek(const Slice& target) = 0;
|
|
||||||
virtual void Seek(const ParsedInternalKey& target) = 0;
|
|
||||||
virtual RangeTombstone Tombstone() const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
// A RangeDelMap keeps track of range deletion tombstones within a snapshot
|
void Next();
|
||||||
// stripe.
|
void Prev();
|
||||||
//
|
|
||||||
// RangeDelMaps are used internally by RangeDelAggregator. They are not intended
|
|
||||||
// to be used directly.
|
|
||||||
class RangeDelMap {
|
|
||||||
public:
|
|
||||||
virtual ~RangeDelMap() = default;
|
|
||||||
|
|
||||||
virtual bool ShouldDelete(const ParsedInternalKey& parsed,
|
void InternalNext();
|
||||||
RangeDelPositioningMode mode) = 0;
|
|
||||||
virtual bool IsRangeOverlapped(const ParsedInternalKey& start,
|
|
||||||
const ParsedInternalKey& end) = 0;
|
|
||||||
virtual void InvalidatePosition() = 0;
|
|
||||||
|
|
||||||
virtual size_t Size() const = 0;
|
// Seeks to the tombstone with the highest viisble sequence number that covers
|
||||||
bool IsEmpty() const { return Size() == 0; }
|
// target (a user key). If no such tombstone exists, the position will be at
|
||||||
|
// the earliest tombstone that ends after target.
|
||||||
|
void Seek(const Slice& target);
|
||||||
|
|
||||||
virtual void AddTombstone(TruncatedRangeTombstone tombstone) = 0;
|
// Seeks to the tombstone with the highest viisble sequence number that covers
|
||||||
virtual std::unique_ptr<RangeDelIterator> NewIterator() = 0;
|
// target (a user key). If no such tombstone exists, the position will be at
|
||||||
};
|
// the latest tombstone that starts before target.
|
||||||
|
void SeekForPrev(const Slice& target);
|
||||||
|
|
||||||
// A RangeDelAggregator aggregates range deletion tombstones as they are
|
void SeekToFirst();
|
||||||
// encountered in memtables/SST files. It provides methods that check whether a
|
void SeekToLast();
|
||||||
// key is covered by range tombstones or write the relevant tombstones to a new
|
|
||||||
// SST file.
|
|
||||||
class RangeDelAggregator {
|
|
||||||
public:
|
|
||||||
// @param snapshots These are used to organize the tombstones into snapshot
|
|
||||||
// stripes, which is the seqnum range between consecutive snapshots,
|
|
||||||
// including the higher snapshot and excluding the lower one. Currently,
|
|
||||||
// this is used by ShouldDelete() to prevent deletion of keys that are
|
|
||||||
// covered by range tombstones in other snapshot stripes. This constructor
|
|
||||||
// is used for writes (flush/compaction). All DB snapshots are provided
|
|
||||||
// such that no keys are removed that are uncovered according to any DB
|
|
||||||
// snapshot.
|
|
||||||
// Note this overload does not lazily initialize Rep.
|
|
||||||
RangeDelAggregator(const InternalKeyComparator& icmp,
|
|
||||||
const std::vector<SequenceNumber>& snapshots,
|
|
||||||
bool collapse_deletions = true);
|
|
||||||
|
|
||||||
// @param upper_bound Similar to snapshots above, except with a single
|
ParsedInternalKey start_key() const {
|
||||||
// snapshot, which allows us to store the snapshot on the stack and defer
|
return (smallest_ == nullptr ||
|
||||||
// initialization of heap-allocating members (in Rep) until the first range
|
icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
|
||||||
// deletion is encountered. This constructor is used in case of reads (get/
|
? iter_->parsed_start_key()
|
||||||
// iterator), for which only the user snapshot (upper_bound) is provided
|
: *smallest_;
|
||||||
// such that the seqnum space is divided into two stripes. Only the older
|
|
||||||
// stripe will be used by ShouldDelete().
|
|
||||||
RangeDelAggregator(const InternalKeyComparator& icmp,
|
|
||||||
SequenceNumber upper_bound,
|
|
||||||
bool collapse_deletions = false);
|
|
||||||
|
|
||||||
// Returns whether the key should be deleted, which is the case when it is
|
|
||||||
// covered by a range tombstone residing in the same snapshot stripe.
|
|
||||||
// @param mode If collapse_deletions_ is true, this dictates how we will find
|
|
||||||
// the deletion whose interval contains this key. Otherwise, its
|
|
||||||
// value must be kFullScan indicating linear scan from beginning.
|
|
||||||
bool ShouldDelete(
|
|
||||||
const ParsedInternalKey& parsed,
|
|
||||||
RangeDelPositioningMode mode = RangeDelPositioningMode::kFullScan) {
|
|
||||||
if (rep_ == nullptr) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return ShouldDeleteImpl(parsed, mode);
|
|
||||||
|
ParsedInternalKey end_key() const {
|
||||||
|
return (largest_ == nullptr ||
|
||||||
|
icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
|
||||||
|
? iter_->parsed_end_key()
|
||||||
|
: *largest_;
|
||||||
}
|
}
|
||||||
bool ShouldDelete(
|
|
||||||
const Slice& internal_key,
|
|
||||||
RangeDelPositioningMode mode = RangeDelPositioningMode::kFullScan) {
|
|
||||||
if (rep_ == nullptr) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return ShouldDeleteImpl(internal_key, mode);
|
|
||||||
}
|
|
||||||
bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
|
|
||||||
RangeDelPositioningMode mode);
|
|
||||||
bool ShouldDeleteImpl(const Slice& internal_key,
|
|
||||||
RangeDelPositioningMode mode);
|
|
||||||
|
|
||||||
// Checks whether range deletions cover any keys between `start` and `end`,
|
SequenceNumber seq() const { return iter_->seq(); }
|
||||||
// inclusive.
|
|
||||||
//
|
|
||||||
// @param start User key representing beginning of range to check for overlap.
|
|
||||||
// @param end User key representing end of range to check for overlap. This
|
|
||||||
// argument is inclusive, so the existence of a range deletion covering
|
|
||||||
// `end` causes this to return true.
|
|
||||||
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
|
||||||
|
|
||||||
// Adds tombstones to the tombstone aggregation structure maintained by this
|
std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
|
||||||
// object. Tombstones are truncated to smallest and largest. If smallest (or
|
SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
|
||||||
// largest) is null, it is not used for truncation. When adding range
|
|
||||||
// tombstones present in an sstable, smallest and largest should be set to
|
|
||||||
// the smallest and largest keys from the sstable file metadata. Note that
|
|
||||||
// tombstones end keys are exclusive while largest is inclusive.
|
|
||||||
// @return non-OK status if any of the tombstone keys are corrupted.
|
|
||||||
Status AddTombstones(std::unique_ptr<InternalIterator> input,
|
|
||||||
const InternalKey* smallest = nullptr,
|
|
||||||
const InternalKey* largest = nullptr);
|
|
||||||
|
|
||||||
// Resets iterators maintained across calls to ShouldDelete(). This may be
|
SequenceNumber upper_bound() const { return iter_->upper_bound(); }
|
||||||
// called when the tombstones change, or the owner may call explicitly, e.g.,
|
|
||||||
// if it's an iterator that just seeked to an arbitrary position. The effect
|
|
||||||
// of invalidation is that the following call to ShouldDelete() will binary
|
|
||||||
// search for its tombstone.
|
|
||||||
void InvalidateRangeDelMapPositions();
|
|
||||||
|
|
||||||
bool IsEmpty();
|
SequenceNumber lower_bound() const { return iter_->lower_bound(); }
|
||||||
bool AddFile(uint64_t file_number);
|
|
||||||
|
|
||||||
// Create a new iterator over the range deletion tombstones in all of the
|
|
||||||
// snapshot stripes in this aggregator. Tombstones are presented in start key
|
|
||||||
// order. Tombstones with the same start key are presented in arbitrary order.
|
|
||||||
//
|
|
||||||
// The iterator is invalidated after any call to AddTombstones. It is the
|
|
||||||
// caller's responsibility to avoid using invalid iterators.
|
|
||||||
std::unique_ptr<RangeDelIterator> NewIterator();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e.,
|
std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
|
||||||
// their seqnums are greater than the next smaller snapshot's seqnum, and the
|
const InternalKeyComparator* icmp_;
|
||||||
// corresponding index into the list of snapshots. Each entry is lazily
|
const ParsedInternalKey* smallest_ = nullptr;
|
||||||
// initialized.
|
const ParsedInternalKey* largest_ = nullptr;
|
||||||
typedef std::map<SequenceNumber,
|
std::list<ParsedInternalKey> pinned_bounds_;
|
||||||
std::pair<std::unique_ptr<RangeDelMap>, size_t>>
|
|
||||||
StripeMap;
|
|
||||||
|
|
||||||
struct Rep {
|
const InternalKey* smallest_ikey_;
|
||||||
std::vector<SequenceNumber> snapshots_;
|
const InternalKey* largest_ikey_;
|
||||||
StripeMap stripe_map_;
|
};
|
||||||
PinnedIteratorsManager pinned_iters_mgr_;
|
|
||||||
std::list<std::string> pinned_slices_;
|
struct SeqMaxComparator {
|
||||||
std::set<uint64_t> added_files_;
|
bool operator()(const TruncatedRangeDelIterator* a,
|
||||||
|
const TruncatedRangeDelIterator* b) const {
|
||||||
|
return a->seq() > b->seq();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StartKeyMinComparator {
|
||||||
|
explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
||||||
|
|
||||||
|
bool operator()(const TruncatedRangeDelIterator* a,
|
||||||
|
const TruncatedRangeDelIterator* b) const {
|
||||||
|
return icmp->Compare(a->start_key(), b->start_key()) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ForwardRangeDelIterator {
|
||||||
|
public:
|
||||||
|
explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
|
||||||
|
|
||||||
|
bool ShouldDelete(const ParsedInternalKey& parsed);
|
||||||
|
void Invalidate();
|
||||||
|
|
||||||
|
void AddNewIter(TruncatedRangeDelIterator* iter,
|
||||||
|
const ParsedInternalKey& parsed) {
|
||||||
|
iter->Seek(parsed.user_key);
|
||||||
|
PushIter(iter, parsed);
|
||||||
|
assert(active_iters_.size() == active_seqnums_.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t UnusedIdx() const { return unused_idx_; }
|
||||||
|
void IncUnusedIdx() { unused_idx_++; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
using ActiveSeqSet =
|
||||||
|
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
||||||
|
|
||||||
|
struct EndKeyMinComparator {
|
||||||
|
explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
||||||
|
|
||||||
|
bool operator()(const ActiveSeqSet::const_iterator& a,
|
||||||
|
const ActiveSeqSet::const_iterator& b) const {
|
||||||
|
return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp;
|
||||||
};
|
};
|
||||||
// Initializes rep_ lazily. This aggregator object is constructed for every
|
|
||||||
// read, so expensive members should only be created when necessary, i.e.,
|
|
||||||
// once the first range deletion is encountered.
|
|
||||||
void InitRep(const std::vector<SequenceNumber>& snapshots);
|
|
||||||
|
|
||||||
std::unique_ptr<RangeDelMap> NewRangeDelMap();
|
void PushIter(TruncatedRangeDelIterator* iter,
|
||||||
RangeDelMap* GetRangeDelMapIfExists(SequenceNumber seq);
|
const ParsedInternalKey& parsed) {
|
||||||
RangeDelMap& GetRangeDelMap(SequenceNumber seq);
|
if (!iter->Valid()) {
|
||||||
|
// The iterator has been fully consumed, so we don't need to add it to
|
||||||
|
// either of the heaps.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int cmp = icmp_->Compare(parsed, iter->start_key());
|
||||||
|
if (cmp < 0) {
|
||||||
|
PushInactiveIter(iter);
|
||||||
|
} else {
|
||||||
|
PushActiveIter(iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
||||||
|
auto seq_pos = active_seqnums_.insert(iter);
|
||||||
|
active_iters_.push(seq_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
TruncatedRangeDelIterator* PopActiveIter() {
|
||||||
|
auto active_top = active_iters_.top();
|
||||||
|
auto iter = *active_top;
|
||||||
|
active_iters_.pop();
|
||||||
|
active_seqnums_.erase(active_top);
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
||||||
|
inactive_iters_.push(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
TruncatedRangeDelIterator* PopInactiveIter() {
|
||||||
|
auto* iter = inactive_iters_.top();
|
||||||
|
inactive_iters_.pop();
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp_;
|
||||||
|
size_t unused_idx_;
|
||||||
|
ActiveSeqSet active_seqnums_;
|
||||||
|
BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
|
||||||
|
BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReverseRangeDelIterator {
|
||||||
|
public:
|
||||||
|
explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
|
||||||
|
|
||||||
|
bool ShouldDelete(const ParsedInternalKey& parsed);
|
||||||
|
void Invalidate();
|
||||||
|
|
||||||
|
void AddNewIter(TruncatedRangeDelIterator* iter,
|
||||||
|
const ParsedInternalKey& parsed) {
|
||||||
|
iter->SeekForPrev(parsed.user_key);
|
||||||
|
PushIter(iter, parsed);
|
||||||
|
assert(active_iters_.size() == active_seqnums_.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t UnusedIdx() const { return unused_idx_; }
|
||||||
|
void IncUnusedIdx() { unused_idx_++; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
using ActiveSeqSet =
|
||||||
|
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
||||||
|
|
||||||
|
struct EndKeyMaxComparator {
|
||||||
|
explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
||||||
|
|
||||||
|
bool operator()(const TruncatedRangeDelIterator* a,
|
||||||
|
const TruncatedRangeDelIterator* b) const {
|
||||||
|
return icmp->Compare(a->end_key(), b->end_key()) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp;
|
||||||
|
};
|
||||||
|
struct StartKeyMaxComparator {
|
||||||
|
explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
||||||
|
|
||||||
|
bool operator()(const ActiveSeqSet::const_iterator& a,
|
||||||
|
const ActiveSeqSet::const_iterator& b) const {
|
||||||
|
return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp;
|
||||||
|
};
|
||||||
|
|
||||||
|
void PushIter(TruncatedRangeDelIterator* iter,
|
||||||
|
const ParsedInternalKey& parsed) {
|
||||||
|
if (!iter->Valid()) {
|
||||||
|
// The iterator has been fully consumed, so we don't need to add it to
|
||||||
|
// either of the heaps.
|
||||||
|
} else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
|
||||||
|
PushInactiveIter(iter);
|
||||||
|
} else {
|
||||||
|
PushActiveIter(iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
||||||
|
auto seq_pos = active_seqnums_.insert(iter);
|
||||||
|
active_iters_.push(seq_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
TruncatedRangeDelIterator* PopActiveIter() {
|
||||||
|
auto active_top = active_iters_.top();
|
||||||
|
auto iter = *active_top;
|
||||||
|
active_iters_.pop();
|
||||||
|
active_seqnums_.erase(active_top);
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
||||||
|
inactive_iters_.push(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
TruncatedRangeDelIterator* PopInactiveIter() {
|
||||||
|
auto* iter = inactive_iters_.top();
|
||||||
|
inactive_iters_.pop();
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp_;
|
||||||
|
size_t unused_idx_;
|
||||||
|
ActiveSeqSet active_seqnums_;
|
||||||
|
BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
|
||||||
|
BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
|
||||||
|
class RangeDelAggregator {
|
||||||
|
public:
|
||||||
|
explicit RangeDelAggregator(const InternalKeyComparator* icmp)
|
||||||
|
: icmp_(icmp) {}
|
||||||
|
virtual ~RangeDelAggregator() {}
|
||||||
|
|
||||||
|
virtual void AddTombstones(
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
||||||
|
const InternalKey* smallest = nullptr,
|
||||||
|
const InternalKey* largest = nullptr) = 0;
|
||||||
|
|
||||||
|
bool ShouldDelete(const Slice& key, RangeDelPositioningMode mode) {
|
||||||
|
ParsedInternalKey parsed;
|
||||||
|
if (!ParseInternalKey(key, &parsed)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return ShouldDelete(parsed, mode);
|
||||||
|
}
|
||||||
|
virtual bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||||
|
RangeDelPositioningMode mode) = 0;
|
||||||
|
|
||||||
|
virtual void InvalidateRangeDelMapPositions() = 0;
|
||||||
|
|
||||||
|
virtual bool IsEmpty() const = 0;
|
||||||
|
|
||||||
|
bool AddFile(uint64_t file_number) {
|
||||||
|
return files_seen_.insert(file_number).second;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
class StripeRep {
|
||||||
|
public:
|
||||||
|
StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
|
||||||
|
SequenceNumber lower_bound)
|
||||||
|
: icmp_(icmp),
|
||||||
|
forward_iter_(icmp),
|
||||||
|
reverse_iter_(icmp),
|
||||||
|
upper_bound_(upper_bound),
|
||||||
|
lower_bound_(lower_bound) {}
|
||||||
|
|
||||||
|
void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
|
||||||
|
iters_.push_back(std::move(input_iter));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsEmpty() const { return iters_.empty(); }
|
||||||
|
|
||||||
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||||
|
RangeDelPositioningMode mode);
|
||||||
|
|
||||||
|
void Invalidate() {
|
||||||
|
InvalidateForwardIter();
|
||||||
|
InvalidateReverseIter();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool InStripe(SequenceNumber seq) const {
|
||||||
|
return lower_bound_ <= seq && seq <= upper_bound_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidateForwardIter() { forward_iter_.Invalidate(); }
|
||||||
|
|
||||||
|
void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
|
||||||
|
|
||||||
|
const InternalKeyComparator* icmp_;
|
||||||
|
std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
|
||||||
|
ForwardRangeDelIterator forward_iter_;
|
||||||
|
ReverseRangeDelIterator reverse_iter_;
|
||||||
SequenceNumber upper_bound_;
|
SequenceNumber upper_bound_;
|
||||||
std::unique_ptr<Rep> rep_;
|
SequenceNumber lower_bound_;
|
||||||
const InternalKeyComparator& icmp_;
|
};
|
||||||
// collapse range deletions so they're binary searchable
|
|
||||||
const bool collapse_deletions_;
|
const InternalKeyComparator* icmp_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::set<uint64_t> files_seen_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ReadRangeDelAggregator : public RangeDelAggregator {
|
||||||
|
public:
|
||||||
|
ReadRangeDelAggregator(const InternalKeyComparator* icmp,
|
||||||
|
SequenceNumber upper_bound)
|
||||||
|
: RangeDelAggregator(icmp),
|
||||||
|
rep_(icmp, upper_bound, 0 /* lower_bound */) {}
|
||||||
|
~ReadRangeDelAggregator() override {}
|
||||||
|
|
||||||
|
using RangeDelAggregator::ShouldDelete;
|
||||||
|
void AddTombstones(
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
||||||
|
const InternalKey* smallest = nullptr,
|
||||||
|
const InternalKey* largest = nullptr) override;
|
||||||
|
|
||||||
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||||
|
RangeDelPositioningMode mode) override;
|
||||||
|
|
||||||
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
||||||
|
|
||||||
|
void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
|
||||||
|
|
||||||
|
bool IsEmpty() const override { return rep_.IsEmpty(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
StripeRep rep_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CompactionRangeDelAggregator : public RangeDelAggregator {
|
||||||
|
public:
|
||||||
|
CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
|
||||||
|
const std::vector<SequenceNumber>& snapshots)
|
||||||
|
: RangeDelAggregator(icmp), snapshots_(&snapshots) {}
|
||||||
|
~CompactionRangeDelAggregator() override {}
|
||||||
|
|
||||||
|
void AddTombstones(
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
||||||
|
const InternalKey* smallest = nullptr,
|
||||||
|
const InternalKey* largest = nullptr) override;
|
||||||
|
|
||||||
|
using RangeDelAggregator::ShouldDelete;
|
||||||
|
bool ShouldDelete(const ParsedInternalKey& parsed,
|
||||||
|
RangeDelPositioningMode mode) override;
|
||||||
|
|
||||||
|
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
||||||
|
|
||||||
|
void InvalidateRangeDelMapPositions() override {
|
||||||
|
for (auto& rep : reps_) {
|
||||||
|
rep.second.Invalidate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsEmpty() const override {
|
||||||
|
for (const auto& rep : reps_) {
|
||||||
|
if (!rep.second.IsEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates an iterator over all the range tombstones in the aggregator, for
|
||||||
|
// use in compaction. Nullptr arguments indicate that the iterator range is
|
||||||
|
// unbounded.
|
||||||
|
// NOTE: the boundaries are used for optimization purposes to reduce the
|
||||||
|
// number of tombstones that are passed to the fragmenter; they do not
|
||||||
|
// guarantee that the resulting iterator only contains range tombstones that
|
||||||
|
// cover keys in the provided range. If required, these bounds must be
|
||||||
|
// enforced during iteration.
|
||||||
|
std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
|
||||||
|
const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr,
|
||||||
|
bool upper_bound_inclusive = false);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
|
||||||
|
std::map<SequenceNumber, StripeRep> reps_;
|
||||||
|
|
||||||
|
const std::vector<SequenceNumber>* snapshots_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -20,7 +20,6 @@ int main() {
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "db/range_del_aggregator.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
#include "db/range_tombstone_fragmenter.h"
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
@ -48,8 +47,6 @@ DEFINE_double(tombstone_width_mean, 100.0, "average range tombstone width");
|
|||||||
DEFINE_double(tombstone_width_stddev, 0.0,
|
DEFINE_double(tombstone_width_stddev, 0.0,
|
||||||
"standard deviation of range tombstone width");
|
"standard deviation of range tombstone width");
|
||||||
|
|
||||||
DEFINE_bool(use_collapsed, true, "use the collapsed range tombstone map");
|
|
||||||
|
|
||||||
DEFINE_int32(seed, 0, "random number generator seed");
|
DEFINE_int32(seed, 0, "random number generator seed");
|
||||||
|
|
||||||
DEFINE_int32(should_deletes_per_run, 1, "number of ShouldDelete calls per run");
|
DEFINE_int32(should_deletes_per_run, 1, "number of ShouldDelete calls per run");
|
||||||
@ -57,8 +54,6 @@ DEFINE_int32(should_deletes_per_run, 1, "number of ShouldDelete calls per run");
|
|||||||
DEFINE_int32(add_tombstones_per_run, 1,
|
DEFINE_int32(add_tombstones_per_run, 1,
|
||||||
"number of AddTombstones calls per run");
|
"number of AddTombstones calls per run");
|
||||||
|
|
||||||
DEFINE_bool(use_v2_aggregator, false, "benchmark RangeDelAggregatorV2");
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct Stats {
|
struct Stats {
|
||||||
@ -187,14 +182,10 @@ int main(int argc, char** argv) {
|
|||||||
std::vector<rocksdb::PersistentRangeTombstone>(
|
std::vector<rocksdb::PersistentRangeTombstone>(
|
||||||
FLAGS_num_range_tombstones);
|
FLAGS_num_range_tombstones);
|
||||||
}
|
}
|
||||||
auto mode = FLAGS_use_collapsed
|
auto mode = rocksdb::RangeDelPositioningMode::kForwardTraversal;
|
||||||
? rocksdb::RangeDelPositioningMode::kForwardTraversal
|
|
||||||
: rocksdb::RangeDelPositioningMode::kFullScan;
|
|
||||||
|
|
||||||
for (int i = 0; i < FLAGS_num_runs; i++) {
|
for (int i = 0; i < FLAGS_num_runs; i++) {
|
||||||
rocksdb::RangeDelAggregator range_del_agg(icmp, {} /* snapshots */,
|
rocksdb::ReadRangeDelAggregator range_del_agg(
|
||||||
FLAGS_use_collapsed);
|
|
||||||
rocksdb::RangeDelAggregatorV2 range_del_agg_v2(
|
|
||||||
&icmp, rocksdb::kMaxSequenceNumber /* upper_bound */);
|
&icmp, rocksdb::kMaxSequenceNumber /* upper_bound */);
|
||||||
|
|
||||||
std::vector<std::unique_ptr<rocksdb::FragmentedRangeTombstoneList> >
|
std::vector<std::unique_ptr<rocksdb::FragmentedRangeTombstoneList> >
|
||||||
@ -206,7 +197,7 @@ int main(int argc, char** argv) {
|
|||||||
// real workloads.
|
// real workloads.
|
||||||
for (int j = 0; j < FLAGS_num_range_tombstones; j++) {
|
for (int j = 0; j < FLAGS_num_range_tombstones; j++) {
|
||||||
uint64_t start = rnd.Uniform(FLAGS_tombstone_start_upper_bound);
|
uint64_t start = rnd.Uniform(FLAGS_tombstone_start_upper_bound);
|
||||||
uint64_t end = start + std::max(1.0, normal_dist(random_gen));
|
uint64_t end = start + static_cast<uint64_t>(std::max(1.0, normal_dist(random_gen)));
|
||||||
persistent_range_tombstones[j] = rocksdb::PersistentRangeTombstone(
|
persistent_range_tombstones[j] = rocksdb::PersistentRangeTombstone(
|
||||||
rocksdb::Key(start), rocksdb::Key(end), j);
|
rocksdb::Key(start), rocksdb::Key(end), j);
|
||||||
}
|
}
|
||||||
@ -220,20 +211,13 @@ int main(int argc, char** argv) {
|
|||||||
std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator>
|
std::unique_ptr<rocksdb::FragmentedRangeTombstoneIterator>
|
||||||
fragmented_range_del_iter(
|
fragmented_range_del_iter(
|
||||||
new rocksdb::FragmentedRangeTombstoneIterator(
|
new rocksdb::FragmentedRangeTombstoneIterator(
|
||||||
fragmented_range_tombstone_lists.back().get(),
|
fragmented_range_tombstone_lists.back().get(), icmp,
|
||||||
rocksdb::kMaxSequenceNumber, icmp));
|
rocksdb::kMaxSequenceNumber));
|
||||||
|
|
||||||
if (FLAGS_use_v2_aggregator) {
|
rocksdb::StopWatchNano stop_watch_add_tombstones(rocksdb::Env::Default(),
|
||||||
rocksdb::StopWatchNano stop_watch_add_tombstones(
|
true /* auto_start */);
|
||||||
rocksdb::Env::Default(), true /* auto_start */);
|
range_del_agg.AddTombstones(std::move(fragmented_range_del_iter));
|
||||||
range_del_agg_v2.AddTombstones(std::move(fragmented_range_del_iter));
|
|
||||||
stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos();
|
stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos();
|
||||||
} else {
|
|
||||||
rocksdb::StopWatchNano stop_watch_add_tombstones(
|
|
||||||
rocksdb::Env::Default(), true /* auto_start */);
|
|
||||||
range_del_agg.AddTombstones(std::move(range_del_iter));
|
|
||||||
stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rocksdb::ParsedInternalKey parsed_key;
|
rocksdb::ParsedInternalKey parsed_key;
|
||||||
@ -247,18 +231,10 @@ int main(int argc, char** argv) {
|
|||||||
std::string key_string = rocksdb::Key(first_key + j);
|
std::string key_string = rocksdb::Key(first_key + j);
|
||||||
parsed_key.user_key = key_string;
|
parsed_key.user_key = key_string;
|
||||||
|
|
||||||
uint64_t call_time;
|
|
||||||
if (FLAGS_use_v2_aggregator) {
|
|
||||||
rocksdb::StopWatchNano stop_watch_should_delete(rocksdb::Env::Default(),
|
|
||||||
true /* auto_start */);
|
|
||||||
range_del_agg_v2.ShouldDelete(parsed_key, mode);
|
|
||||||
call_time = stop_watch_should_delete.ElapsedNanos();
|
|
||||||
} else {
|
|
||||||
rocksdb::StopWatchNano stop_watch_should_delete(rocksdb::Env::Default(),
|
rocksdb::StopWatchNano stop_watch_should_delete(rocksdb::Env::Default(),
|
||||||
true /* auto_start */);
|
true /* auto_start */);
|
||||||
range_del_agg.ShouldDelete(parsed_key, mode);
|
range_del_agg.ShouldDelete(parsed_key, mode);
|
||||||
call_time = stop_watch_should_delete.ElapsedNanos();
|
uint64_t call_time = stop_watch_should_delete.ElapsedNanos();
|
||||||
}
|
|
||||||
|
|
||||||
if (j == 0) {
|
if (j == 0) {
|
||||||
stats.time_first_should_delete += call_time;
|
stats.time_first_should_delete += call_time;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,295 +0,0 @@
|
|||||||
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under both the GPLv2 (found in the
|
|
||||||
// COPYING file in the root directory) and Apache 2.0 License
|
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
|
||||||
|
|
||||||
#include "db/range_del_aggregator_v2.h"
|
|
||||||
|
|
||||||
#include "db/compaction_iteration_stats.h"
|
|
||||||
#include "db/dbformat.h"
|
|
||||||
#include "db/pinned_iterators_manager.h"
|
|
||||||
#include "db/range_del_aggregator.h"
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
|
||||||
#include "db/version_edit.h"
|
|
||||||
#include "include/rocksdb/comparator.h"
|
|
||||||
#include "include/rocksdb/types.h"
|
|
||||||
#include "table/internal_iterator.h"
|
|
||||||
#include "table/scoped_arena_iterator.h"
|
|
||||||
#include "table/table_builder.h"
|
|
||||||
#include "util/heap.h"
|
|
||||||
#include "util/kv_map.h"
|
|
||||||
#include "util/vector_iterator.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator::TruncatedRangeDelIterator(
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
|
|
||||||
const InternalKeyComparator* icmp, const InternalKey* smallest,
|
|
||||||
const InternalKey* largest)
|
|
||||||
: iter_(std::move(iter)), icmp_(icmp) {
|
|
||||||
if (smallest != nullptr) {
|
|
||||||
pinned_bounds_.emplace_back();
|
|
||||||
auto& parsed_smallest = pinned_bounds_.back();
|
|
||||||
if (!ParseInternalKey(smallest->Encode(), &parsed_smallest)) {
|
|
||||||
assert(false);
|
|
||||||
}
|
|
||||||
smallest_ = &parsed_smallest;
|
|
||||||
}
|
|
||||||
if (largest != nullptr) {
|
|
||||||
pinned_bounds_.emplace_back();
|
|
||||||
auto& parsed_largest = pinned_bounds_.back();
|
|
||||||
if (!ParseInternalKey(largest->Encode(), &parsed_largest)) {
|
|
||||||
assert(false);
|
|
||||||
}
|
|
||||||
if (parsed_largest.type == kTypeRangeDeletion &&
|
|
||||||
parsed_largest.sequence == kMaxSequenceNumber) {
|
|
||||||
// The file boundary has been artificially extended by a range tombstone.
|
|
||||||
// We do not need to adjust largest to properly truncate range
|
|
||||||
// tombstones that extend past the boundary.
|
|
||||||
} else if (parsed_largest.sequence == 0) {
|
|
||||||
// The largest key in the sstable has a sequence number of 0. Since we
|
|
||||||
// guarantee that no internal keys with the same user key and sequence
|
|
||||||
// number can exist in a DB, we know that the largest key in this sstable
|
|
||||||
// cannot exist as the smallest key in the next sstable. This further
|
|
||||||
// implies that no range tombstone in this sstable covers largest;
|
|
||||||
// otherwise, the file boundary would have been artificially extended.
|
|
||||||
//
|
|
||||||
// Therefore, we will never truncate a range tombstone at largest, so we
|
|
||||||
// can leave it unchanged.
|
|
||||||
} else {
|
|
||||||
// The same user key may straddle two sstable boundaries. To ensure that
|
|
||||||
// the truncated end key can cover the largest key in this sstable, reduce
|
|
||||||
// its sequence number by 1.
|
|
||||||
parsed_largest.sequence -= 1;
|
|
||||||
}
|
|
||||||
largest_ = &parsed_largest;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool TruncatedRangeDelIterator::Valid() const {
|
|
||||||
return iter_->Valid() &&
|
|
||||||
(smallest_ == nullptr ||
|
|
||||||
icmp_->Compare(*smallest_, iter_->parsed_end_key()) < 0) &&
|
|
||||||
(largest_ == nullptr ||
|
|
||||||
icmp_->Compare(iter_->parsed_start_key(), *largest_) < 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TruncatedRangeDelIterator::Next() { iter_->TopNext(); }
|
|
||||||
|
|
||||||
void TruncatedRangeDelIterator::Prev() { iter_->TopPrev(); }
|
|
||||||
|
|
||||||
// NOTE: target is a user key
|
|
||||||
void TruncatedRangeDelIterator::Seek(const Slice& target) {
|
|
||||||
if (largest_ != nullptr &&
|
|
||||||
icmp_->Compare(*largest_, ParsedInternalKey(target, kMaxSequenceNumber,
|
|
||||||
kTypeRangeDeletion)) <= 0) {
|
|
||||||
iter_->Invalidate();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
iter_->Seek(target);
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: target is a user key
|
|
||||||
void TruncatedRangeDelIterator::SeekForPrev(const Slice& target) {
|
|
||||||
if (smallest_ != nullptr &&
|
|
||||||
icmp_->Compare(ParsedInternalKey(target, 0, kTypeRangeDeletion),
|
|
||||||
*smallest_) < 0) {
|
|
||||||
iter_->Invalidate();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
iter_->SeekForPrev(target);
|
|
||||||
}
|
|
||||||
|
|
||||||
void TruncatedRangeDelIterator::SeekToFirst() { iter_->SeekToTopFirst(); }
|
|
||||||
|
|
||||||
void TruncatedRangeDelIterator::SeekToLast() { iter_->SeekToTopLast(); }
|
|
||||||
|
|
||||||
ForwardRangeDelIterator::ForwardRangeDelIterator(
|
|
||||||
const InternalKeyComparator* icmp,
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters)
|
|
||||||
: icmp_(icmp),
|
|
||||||
iters_(iters),
|
|
||||||
unused_idx_(0),
|
|
||||||
active_seqnums_(SeqMaxComparator()),
|
|
||||||
active_iters_(EndKeyMinComparator(icmp)),
|
|
||||||
inactive_iters_(StartKeyMinComparator(icmp)) {}
|
|
||||||
|
|
||||||
bool ForwardRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) {
|
|
||||||
assert(iters_ != nullptr);
|
|
||||||
// Pick up previously unseen iterators.
|
|
||||||
for (auto it = std::next(iters_->begin(), unused_idx_); it != iters_->end();
|
|
||||||
++it, ++unused_idx_) {
|
|
||||||
auto& iter = *it;
|
|
||||||
iter->Seek(parsed.user_key);
|
|
||||||
PushIter(iter.get(), parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move active iterators that end before parsed.
|
|
||||||
while (!active_iters_.empty() &&
|
|
||||||
icmp_->Compare((*active_iters_.top())->end_key(), parsed) <= 0) {
|
|
||||||
TruncatedRangeDelIterator* iter = PopActiveIter();
|
|
||||||
do {
|
|
||||||
iter->Next();
|
|
||||||
} while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0);
|
|
||||||
PushIter(iter, parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move inactive iterators that start before parsed.
|
|
||||||
while (!inactive_iters_.empty() &&
|
|
||||||
icmp_->Compare(inactive_iters_.top()->start_key(), parsed) <= 0) {
|
|
||||||
TruncatedRangeDelIterator* iter = PopInactiveIter();
|
|
||||||
while (iter->Valid() && icmp_->Compare(iter->end_key(), parsed) <= 0) {
|
|
||||||
iter->Next();
|
|
||||||
}
|
|
||||||
PushIter(iter, parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
return active_seqnums_.empty()
|
|
||||||
? false
|
|
||||||
: (*active_seqnums_.begin())->seq() > parsed.sequence;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ForwardRangeDelIterator::Invalidate() {
|
|
||||||
unused_idx_ = 0;
|
|
||||||
active_iters_.clear();
|
|
||||||
active_seqnums_.clear();
|
|
||||||
inactive_iters_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
ReverseRangeDelIterator::ReverseRangeDelIterator(
|
|
||||||
const InternalKeyComparator* icmp,
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters)
|
|
||||||
: icmp_(icmp),
|
|
||||||
iters_(iters),
|
|
||||||
unused_idx_(0),
|
|
||||||
active_seqnums_(SeqMaxComparator()),
|
|
||||||
active_iters_(StartKeyMaxComparator(icmp)),
|
|
||||||
inactive_iters_(EndKeyMaxComparator(icmp)) {}
|
|
||||||
|
|
||||||
bool ReverseRangeDelIterator::ShouldDelete(const ParsedInternalKey& parsed) {
|
|
||||||
assert(iters_ != nullptr);
|
|
||||||
// Pick up previously unseen iterators.
|
|
||||||
for (auto it = std::next(iters_->begin(), unused_idx_); it != iters_->end();
|
|
||||||
++it, ++unused_idx_) {
|
|
||||||
auto& iter = *it;
|
|
||||||
iter->SeekForPrev(parsed.user_key);
|
|
||||||
PushIter(iter.get(), parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move active iterators that start after parsed.
|
|
||||||
while (!active_iters_.empty() &&
|
|
||||||
icmp_->Compare(parsed, (*active_iters_.top())->start_key()) < 0) {
|
|
||||||
TruncatedRangeDelIterator* iter = PopActiveIter();
|
|
||||||
do {
|
|
||||||
iter->Prev();
|
|
||||||
} while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0);
|
|
||||||
PushIter(iter, parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Move inactive iterators that end after parsed.
|
|
||||||
while (!inactive_iters_.empty() &&
|
|
||||||
icmp_->Compare(parsed, inactive_iters_.top()->end_key()) < 0) {
|
|
||||||
TruncatedRangeDelIterator* iter = PopInactiveIter();
|
|
||||||
while (iter->Valid() && icmp_->Compare(parsed, iter->start_key()) < 0) {
|
|
||||||
iter->Prev();
|
|
||||||
}
|
|
||||||
PushIter(iter, parsed);
|
|
||||||
assert(active_iters_.size() == active_seqnums_.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
return active_seqnums_.empty()
|
|
||||||
? false
|
|
||||||
: (*active_seqnums_.begin())->seq() > parsed.sequence;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ReverseRangeDelIterator::Invalidate() {
|
|
||||||
unused_idx_ = 0;
|
|
||||||
active_iters_.clear();
|
|
||||||
active_seqnums_.clear();
|
|
||||||
inactive_iters_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
RangeDelAggregatorV2::RangeDelAggregatorV2(const InternalKeyComparator* icmp,
|
|
||||||
SequenceNumber /* upper_bound */)
|
|
||||||
: icmp_(icmp), forward_iter_(icmp, &iters_), reverse_iter_(icmp, &iters_) {}
|
|
||||||
|
|
||||||
void RangeDelAggregatorV2::AddTombstones(
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
|
||||||
const InternalKey* smallest, const InternalKey* largest) {
|
|
||||||
if (input_iter == nullptr || input_iter->empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (wrapped_range_del_agg != nullptr) {
|
|
||||||
wrapped_range_del_agg->AddTombstones(std::move(input_iter), smallest,
|
|
||||||
largest);
|
|
||||||
// TODO: this eats the status of the wrapped call; may want to propagate it
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
iters_.emplace_back(new TruncatedRangeDelIterator(std::move(input_iter),
|
|
||||||
icmp_, smallest, largest));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RangeDelAggregatorV2::ShouldDelete(const ParsedInternalKey& parsed,
|
|
||||||
RangeDelPositioningMode mode) {
|
|
||||||
if (wrapped_range_del_agg != nullptr) {
|
|
||||||
return wrapped_range_del_agg->ShouldDelete(parsed, mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (mode) {
|
|
||||||
case RangeDelPositioningMode::kForwardTraversal:
|
|
||||||
reverse_iter_.Invalidate();
|
|
||||||
return forward_iter_.ShouldDelete(parsed);
|
|
||||||
case RangeDelPositioningMode::kBackwardTraversal:
|
|
||||||
forward_iter_.Invalidate();
|
|
||||||
return reverse_iter_.ShouldDelete(parsed);
|
|
||||||
default:
|
|
||||||
assert(false);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RangeDelAggregatorV2::IsRangeOverlapped(const Slice& start,
|
|
||||||
const Slice& end) {
|
|
||||||
assert(wrapped_range_del_agg == nullptr);
|
|
||||||
InvalidateRangeDelMapPositions();
|
|
||||||
|
|
||||||
// Set the internal start/end keys so that:
|
|
||||||
// - if start_ikey has the same user key and sequence number as the current
|
|
||||||
// end key, start_ikey will be considered greater; and
|
|
||||||
// - if end_ikey has the same user key and sequence number as the current
|
|
||||||
// start key, end_ikey will be considered greater.
|
|
||||||
ParsedInternalKey start_ikey(start, kMaxSequenceNumber,
|
|
||||||
static_cast<ValueType>(0));
|
|
||||||
ParsedInternalKey end_ikey(end, 0, static_cast<ValueType>(0));
|
|
||||||
for (auto& iter : iters_) {
|
|
||||||
bool checked_candidate_tombstones = false;
|
|
||||||
for (iter->SeekForPrev(start);
|
|
||||||
iter->Valid() && icmp_->Compare(iter->start_key(), end_ikey) <= 0;
|
|
||||||
iter->Next()) {
|
|
||||||
checked_candidate_tombstones = true;
|
|
||||||
if (icmp_->Compare(start_ikey, iter->end_key()) < 0 &&
|
|
||||||
icmp_->Compare(iter->start_key(), end_ikey) <= 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!checked_candidate_tombstones) {
|
|
||||||
// Do an additional check for when the end of the range is the begin key
|
|
||||||
// of a tombstone, which we missed earlier since SeekForPrev'ing to the
|
|
||||||
// start was invalid.
|
|
||||||
iter->SeekForPrev(end);
|
|
||||||
if (iter->Valid() && icmp_->Compare(start_ikey, iter->end_key()) < 0 &&
|
|
||||||
icmp_->Compare(iter->start_key(), end_ikey) <= 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -1,295 +0,0 @@
|
|||||||
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under both the GPLv2 (found in the
|
|
||||||
// COPYING file in the root directory) and Apache 2.0 License
|
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <list>
|
|
||||||
#include <map>
|
|
||||||
#include <set>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "db/compaction_iteration_stats.h"
|
|
||||||
#include "db/dbformat.h"
|
|
||||||
#include "db/pinned_iterators_manager.h"
|
|
||||||
#include "db/range_del_aggregator.h"
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
|
||||||
#include "db/version_edit.h"
|
|
||||||
#include "include/rocksdb/comparator.h"
|
|
||||||
#include "include/rocksdb/types.h"
|
|
||||||
#include "table/internal_iterator.h"
|
|
||||||
#include "table/scoped_arena_iterator.h"
|
|
||||||
#include "table/table_builder.h"
|
|
||||||
#include "util/heap.h"
|
|
||||||
#include "util/kv_map.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
class RangeDelAggregatorV2;
|
|
||||||
|
|
||||||
class TruncatedRangeDelIterator {
|
|
||||||
public:
|
|
||||||
TruncatedRangeDelIterator(
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
|
|
||||||
const InternalKeyComparator* icmp, const InternalKey* smallest,
|
|
||||||
const InternalKey* largest);
|
|
||||||
|
|
||||||
bool Valid() const;
|
|
||||||
|
|
||||||
void Next();
|
|
||||||
void Prev();
|
|
||||||
|
|
||||||
// Seeks to the tombstone with the highest viisble sequence number that covers
|
|
||||||
// target (a user key). If no such tombstone exists, the position will be at
|
|
||||||
// the earliest tombstone that ends after target.
|
|
||||||
void Seek(const Slice& target);
|
|
||||||
|
|
||||||
// Seeks to the tombstone with the highest viisble sequence number that covers
|
|
||||||
// target (a user key). If no such tombstone exists, the position will be at
|
|
||||||
// the latest tombstone that starts before target.
|
|
||||||
void SeekForPrev(const Slice& target);
|
|
||||||
|
|
||||||
void SeekToFirst();
|
|
||||||
void SeekToLast();
|
|
||||||
|
|
||||||
ParsedInternalKey start_key() const {
|
|
||||||
return (smallest_ == nullptr ||
|
|
||||||
icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
|
|
||||||
? iter_->parsed_start_key()
|
|
||||||
: *smallest_;
|
|
||||||
}
|
|
||||||
|
|
||||||
ParsedInternalKey end_key() const {
|
|
||||||
return (largest_ == nullptr ||
|
|
||||||
icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
|
|
||||||
? iter_->parsed_end_key()
|
|
||||||
: *largest_;
|
|
||||||
}
|
|
||||||
|
|
||||||
SequenceNumber seq() const { return iter_->seq(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
|
|
||||||
const InternalKeyComparator* icmp_;
|
|
||||||
const ParsedInternalKey* smallest_ = nullptr;
|
|
||||||
const ParsedInternalKey* largest_ = nullptr;
|
|
||||||
std::list<ParsedInternalKey> pinned_bounds_;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SeqMaxComparator {
|
|
||||||
bool operator()(const TruncatedRangeDelIterator* a,
|
|
||||||
const TruncatedRangeDelIterator* b) const {
|
|
||||||
return a->seq() > b->seq();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class ForwardRangeDelIterator {
|
|
||||||
public:
|
|
||||||
ForwardRangeDelIterator(
|
|
||||||
const InternalKeyComparator* icmp,
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters);
|
|
||||||
|
|
||||||
bool ShouldDelete(const ParsedInternalKey& parsed);
|
|
||||||
void Invalidate();
|
|
||||||
|
|
||||||
private:
|
|
||||||
using ActiveSeqSet =
|
|
||||||
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
|
||||||
|
|
||||||
struct StartKeyMinComparator {
|
|
||||||
explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
||||||
|
|
||||||
bool operator()(const TruncatedRangeDelIterator* a,
|
|
||||||
const TruncatedRangeDelIterator* b) const {
|
|
||||||
return icmp->Compare(a->start_key(), b->start_key()) > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp;
|
|
||||||
};
|
|
||||||
struct EndKeyMinComparator {
|
|
||||||
explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
||||||
|
|
||||||
bool operator()(const ActiveSeqSet::const_iterator& a,
|
|
||||||
const ActiveSeqSet::const_iterator& b) const {
|
|
||||||
return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp;
|
|
||||||
};
|
|
||||||
|
|
||||||
void PushIter(TruncatedRangeDelIterator* iter,
|
|
||||||
const ParsedInternalKey& parsed) {
|
|
||||||
if (!iter->Valid()) {
|
|
||||||
// The iterator has been fully consumed, so we don't need to add it to
|
|
||||||
// either of the heaps.
|
|
||||||
} else if (icmp_->Compare(parsed, iter->start_key()) < 0) {
|
|
||||||
PushInactiveIter(iter);
|
|
||||||
} else {
|
|
||||||
PushActiveIter(iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
|
||||||
auto seq_pos = active_seqnums_.insert(iter);
|
|
||||||
active_iters_.push(seq_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator* PopActiveIter() {
|
|
||||||
auto active_top = active_iters_.top();
|
|
||||||
auto iter = *active_top;
|
|
||||||
active_iters_.pop();
|
|
||||||
active_seqnums_.erase(active_top);
|
|
||||||
return iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
|
||||||
inactive_iters_.push(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator* PopInactiveIter() {
|
|
||||||
auto* iter = inactive_iters_.top();
|
|
||||||
inactive_iters_.pop();
|
|
||||||
return iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp_;
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters_;
|
|
||||||
size_t unused_idx_;
|
|
||||||
ActiveSeqSet active_seqnums_;
|
|
||||||
BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
|
|
||||||
BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class ReverseRangeDelIterator {
|
|
||||||
public:
|
|
||||||
ReverseRangeDelIterator(
|
|
||||||
const InternalKeyComparator* icmp,
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters);
|
|
||||||
|
|
||||||
bool ShouldDelete(const ParsedInternalKey& parsed);
|
|
||||||
void Invalidate();
|
|
||||||
|
|
||||||
private:
|
|
||||||
using ActiveSeqSet =
|
|
||||||
std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
|
|
||||||
|
|
||||||
struct EndKeyMaxComparator {
|
|
||||||
explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
||||||
|
|
||||||
bool operator()(const TruncatedRangeDelIterator* a,
|
|
||||||
const TruncatedRangeDelIterator* b) const {
|
|
||||||
return icmp->Compare(a->end_key(), b->end_key()) < 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp;
|
|
||||||
};
|
|
||||||
struct StartKeyMaxComparator {
|
|
||||||
explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
|
|
||||||
|
|
||||||
bool operator()(const ActiveSeqSet::const_iterator& a,
|
|
||||||
const ActiveSeqSet::const_iterator& b) const {
|
|
||||||
return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp;
|
|
||||||
};
|
|
||||||
|
|
||||||
void PushIter(TruncatedRangeDelIterator* iter,
|
|
||||||
const ParsedInternalKey& parsed) {
|
|
||||||
if (!iter->Valid()) {
|
|
||||||
// The iterator has been fully consumed, so we don't need to add it to
|
|
||||||
// either of the heaps.
|
|
||||||
} else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
|
|
||||||
PushInactiveIter(iter);
|
|
||||||
} else {
|
|
||||||
PushActiveIter(iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PushActiveIter(TruncatedRangeDelIterator* iter) {
|
|
||||||
auto seq_pos = active_seqnums_.insert(iter);
|
|
||||||
active_iters_.push(seq_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator* PopActiveIter() {
|
|
||||||
auto active_top = active_iters_.top();
|
|
||||||
auto iter = *active_top;
|
|
||||||
active_iters_.pop();
|
|
||||||
active_seqnums_.erase(active_top);
|
|
||||||
return iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PushInactiveIter(TruncatedRangeDelIterator* iter) {
|
|
||||||
inactive_iters_.push(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator* PopInactiveIter() {
|
|
||||||
auto* iter = inactive_iters_.top();
|
|
||||||
inactive_iters_.pop();
|
|
||||||
return iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
const InternalKeyComparator* icmp_;
|
|
||||||
const std::vector<std::unique_ptr<TruncatedRangeDelIterator>>* iters_;
|
|
||||||
size_t unused_idx_;
|
|
||||||
ActiveSeqSet active_seqnums_;
|
|
||||||
BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
|
|
||||||
BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class RangeDelAggregatorV2 {
|
|
||||||
public:
|
|
||||||
RangeDelAggregatorV2(const InternalKeyComparator* icmp,
|
|
||||||
SequenceNumber upper_bound);
|
|
||||||
|
|
||||||
void AddTombstones(
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
|
|
||||||
const InternalKey* smallest = nullptr,
|
|
||||||
const InternalKey* largest = nullptr);
|
|
||||||
|
|
||||||
bool ShouldDelete(const ParsedInternalKey& parsed,
|
|
||||||
RangeDelPositioningMode mode);
|
|
||||||
|
|
||||||
bool IsRangeOverlapped(const Slice& start, const Slice& end);
|
|
||||||
|
|
||||||
void InvalidateRangeDelMapPositions() {
|
|
||||||
forward_iter_.Invalidate();
|
|
||||||
reverse_iter_.Invalidate();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsEmpty() const { return iters_.empty(); }
|
|
||||||
bool AddFile(uint64_t file_number) {
|
|
||||||
return files_seen_.insert(file_number).second;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adaptor method to pass calls through to an old-style RangeDelAggregator.
|
|
||||||
// Will be removed once this new version supports an iterator that can be used
|
|
||||||
// during flush/compaction.
|
|
||||||
RangeDelAggregator* DelegateToRangeDelAggregator(
|
|
||||||
const std::vector<SequenceNumber>& snapshots) {
|
|
||||||
wrapped_range_del_agg.reset(new RangeDelAggregator(
|
|
||||||
*icmp_, snapshots, true /* collapse_deletions */));
|
|
||||||
return wrapped_range_del_agg.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<RangeDelIterator> NewIterator() {
|
|
||||||
assert(wrapped_range_del_agg != nullptr);
|
|
||||||
return wrapped_range_del_agg->NewIterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const InternalKeyComparator* icmp_;
|
|
||||||
|
|
||||||
std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
|
|
||||||
std::set<uint64_t> files_seen_;
|
|
||||||
|
|
||||||
ForwardRangeDelIterator forward_iter_;
|
|
||||||
ReverseRangeDelIterator reverse_iter_;
|
|
||||||
|
|
||||||
// TODO: remove once V2 supports exposing tombstone iterators
|
|
||||||
std::unique_ptr<RangeDelAggregator> wrapped_range_del_agg;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
@ -1,469 +0,0 @@
|
|||||||
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
||||||
// This source code is licensed under both the GPLv2 (found in the
|
|
||||||
// COPYING file in the root directory) and Apache 2.0 License
|
|
||||||
// (found in the LICENSE.Apache file in the root directory).
|
|
||||||
|
|
||||||
#include "db/range_del_aggregator_v2.h"
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "db/db_test_util.h"
|
|
||||||
#include "db/dbformat.h"
|
|
||||||
#include "db/range_tombstone_fragmenter.h"
|
|
||||||
#include "util/testutil.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
|
||||||
|
|
||||||
class RangeDelAggregatorV2Test : public testing::Test {};
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
static auto bytewise_icmp = InternalKeyComparator(BytewiseComparator());
|
|
||||||
|
|
||||||
std::unique_ptr<InternalIterator> MakeRangeDelIter(
|
|
||||||
const std::vector<RangeTombstone>& range_dels) {
|
|
||||||
std::vector<std::string> keys, values;
|
|
||||||
for (const auto& range_del : range_dels) {
|
|
||||||
auto key_and_value = range_del.Serialize();
|
|
||||||
keys.push_back(key_and_value.first.Encode().ToString());
|
|
||||||
values.push_back(key_and_value.second.ToString());
|
|
||||||
}
|
|
||||||
return std::unique_ptr<test::VectorIterator>(
|
|
||||||
new test::VectorIterator(keys, values));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::unique_ptr<FragmentedRangeTombstoneList>>
|
|
||||||
MakeFragmentedTombstoneLists(
|
|
||||||
const std::vector<std::vector<RangeTombstone>>& range_dels_list) {
|
|
||||||
std::vector<std::unique_ptr<FragmentedRangeTombstoneList>> fragment_lists;
|
|
||||||
for (const auto& range_dels : range_dels_list) {
|
|
||||||
auto range_del_iter = MakeRangeDelIter(range_dels);
|
|
||||||
fragment_lists.emplace_back(new FragmentedRangeTombstoneList(
|
|
||||||
std::move(range_del_iter), bytewise_icmp));
|
|
||||||
}
|
|
||||||
return fragment_lists;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct TruncatedIterScanTestCase {
|
|
||||||
ParsedInternalKey start;
|
|
||||||
ParsedInternalKey end;
|
|
||||||
SequenceNumber seq;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TruncatedIterSeekTestCase {
|
|
||||||
Slice target;
|
|
||||||
ParsedInternalKey start;
|
|
||||||
ParsedInternalKey end;
|
|
||||||
SequenceNumber seq;
|
|
||||||
bool invalid;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ShouldDeleteTestCase {
|
|
||||||
ParsedInternalKey lookup_key;
|
|
||||||
bool result;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct IsRangeOverlappedTestCase {
|
|
||||||
Slice start;
|
|
||||||
Slice end;
|
|
||||||
bool result;
|
|
||||||
};
|
|
||||||
|
|
||||||
ParsedInternalKey UncutEndpoint(const Slice& s) {
|
|
||||||
return ParsedInternalKey(s, kMaxSequenceNumber, kTypeRangeDeletion);
|
|
||||||
}
|
|
||||||
|
|
||||||
ParsedInternalKey InternalValue(const Slice& key, SequenceNumber seq) {
|
|
||||||
return ParsedInternalKey(key, seq, kTypeValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
void VerifyIterator(
|
|
||||||
TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp,
|
|
||||||
const std::vector<TruncatedIterScanTestCase>& expected_range_dels) {
|
|
||||||
// Test forward iteration.
|
|
||||||
iter->SeekToFirst();
|
|
||||||
for (size_t i = 0; i < expected_range_dels.size(); i++, iter->Next()) {
|
|
||||||
ASSERT_TRUE(iter->Valid());
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->start_key(), expected_range_dels[i].start));
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->end_key(), expected_range_dels[i].end));
|
|
||||||
EXPECT_EQ(expected_range_dels[i].seq, iter->seq());
|
|
||||||
}
|
|
||||||
EXPECT_FALSE(iter->Valid());
|
|
||||||
|
|
||||||
// Test reverse iteration.
|
|
||||||
iter->SeekToLast();
|
|
||||||
std::vector<TruncatedIterScanTestCase> reverse_expected_range_dels(
|
|
||||||
expected_range_dels.rbegin(), expected_range_dels.rend());
|
|
||||||
for (size_t i = 0; i < reverse_expected_range_dels.size();
|
|
||||||
i++, iter->Prev()) {
|
|
||||||
ASSERT_TRUE(iter->Valid());
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->start_key(),
|
|
||||||
reverse_expected_range_dels[i].start));
|
|
||||||
EXPECT_EQ(
|
|
||||||
0, icmp.Compare(iter->end_key(), reverse_expected_range_dels[i].end));
|
|
||||||
EXPECT_EQ(reverse_expected_range_dels[i].seq, iter->seq());
|
|
||||||
}
|
|
||||||
EXPECT_FALSE(iter->Valid());
|
|
||||||
}
|
|
||||||
|
|
||||||
void VerifySeek(TruncatedRangeDelIterator* iter,
|
|
||||||
const InternalKeyComparator& icmp,
|
|
||||||
const std::vector<TruncatedIterSeekTestCase>& test_cases) {
|
|
||||||
for (const auto& test_case : test_cases) {
|
|
||||||
iter->Seek(test_case.target);
|
|
||||||
if (test_case.invalid) {
|
|
||||||
ASSERT_FALSE(iter->Valid());
|
|
||||||
} else {
|
|
||||||
ASSERT_TRUE(iter->Valid());
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start));
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end));
|
|
||||||
EXPECT_EQ(test_case.seq, iter->seq());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VerifySeekForPrev(
|
|
||||||
TruncatedRangeDelIterator* iter, const InternalKeyComparator& icmp,
|
|
||||||
const std::vector<TruncatedIterSeekTestCase>& test_cases) {
|
|
||||||
for (const auto& test_case : test_cases) {
|
|
||||||
iter->SeekForPrev(test_case.target);
|
|
||||||
if (test_case.invalid) {
|
|
||||||
ASSERT_FALSE(iter->Valid());
|
|
||||||
} else {
|
|
||||||
ASSERT_TRUE(iter->Valid());
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->start_key(), test_case.start));
|
|
||||||
EXPECT_EQ(0, icmp.Compare(iter->end_key(), test_case.end));
|
|
||||||
EXPECT_EQ(test_case.seq, iter->seq());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VerifyShouldDelete(RangeDelAggregatorV2* range_del_agg,
|
|
||||||
const std::vector<ShouldDeleteTestCase>& test_cases) {
|
|
||||||
for (const auto& test_case : test_cases) {
|
|
||||||
EXPECT_EQ(
|
|
||||||
test_case.result,
|
|
||||||
range_del_agg->ShouldDelete(
|
|
||||||
test_case.lookup_key, RangeDelPositioningMode::kForwardTraversal));
|
|
||||||
}
|
|
||||||
for (auto it = test_cases.rbegin(); it != test_cases.rend(); ++it) {
|
|
||||||
const auto& test_case = *it;
|
|
||||||
EXPECT_EQ(
|
|
||||||
test_case.result,
|
|
||||||
range_del_agg->ShouldDelete(
|
|
||||||
test_case.lookup_key, RangeDelPositioningMode::kBackwardTraversal));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VerifyIsRangeOverlapped(
|
|
||||||
RangeDelAggregatorV2* range_del_agg,
|
|
||||||
const std::vector<IsRangeOverlappedTestCase>& test_cases) {
|
|
||||||
for (const auto& test_case : test_cases) {
|
|
||||||
EXPECT_EQ(test_case.result,
|
|
||||||
range_del_agg->IsRangeOverlapped(test_case.start, test_case.end));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, EmptyTruncatedIter) {
|
|
||||||
auto range_del_iter = MakeRangeDelIter({});
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
|
||||||
bytewise_icmp);
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber,
|
|
||||||
bytewise_icmp));
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
|
|
||||||
nullptr);
|
|
||||||
|
|
||||||
iter.SeekToFirst();
|
|
||||||
ASSERT_FALSE(iter.Valid());
|
|
||||||
|
|
||||||
iter.SeekToLast();
|
|
||||||
ASSERT_FALSE(iter.Valid());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, UntruncatedIter) {
|
|
||||||
auto range_del_iter =
|
|
||||||
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
|
||||||
bytewise_icmp);
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber,
|
|
||||||
bytewise_icmp));
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
|
|
||||||
nullptr);
|
|
||||||
|
|
||||||
VerifyIterator(&iter, bytewise_icmp,
|
|
||||||
{{UncutEndpoint("a"), UncutEndpoint("e"), 10},
|
|
||||||
{UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{UncutEndpoint("j"), UncutEndpoint("n"), 4}});
|
|
||||||
|
|
||||||
VerifySeek(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", UncutEndpoint("a"), UncutEndpoint("e"), 10},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("j"), UncutEndpoint("n"), 4},
|
|
||||||
{"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */},
|
|
||||||
{"", UncutEndpoint("a"), UncutEndpoint("e"), 10}});
|
|
||||||
|
|
||||||
VerifySeekForPrev(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", UncutEndpoint("a"), UncutEndpoint("e"), 10},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"n", UncutEndpoint("j"), UncutEndpoint("n"), 4},
|
|
||||||
{"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, UntruncatedIterWithSnapshot) {
|
|
||||||
auto range_del_iter =
|
|
||||||
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
|
||||||
bytewise_icmp);
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(&fragment_list, 9 /* snapshot */,
|
|
||||||
bytewise_icmp));
|
|
||||||
|
|
||||||
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr,
|
|
||||||
nullptr);
|
|
||||||
|
|
||||||
VerifyIterator(&iter, bytewise_icmp,
|
|
||||||
{{UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{UncutEndpoint("j"), UncutEndpoint("n"), 4}});
|
|
||||||
|
|
||||||
VerifySeek(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("j"), UncutEndpoint("n"), 4},
|
|
||||||
{"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */},
|
|
||||||
{"", UncutEndpoint("e"), UncutEndpoint("g"), 8}});
|
|
||||||
|
|
||||||
VerifySeekForPrev(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"n", UncutEndpoint("j"), UncutEndpoint("n"), 4},
|
|
||||||
{"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, TruncatedIter) {
|
|
||||||
auto range_del_iter =
|
|
||||||
MakeRangeDelIter({{"a", "e", 10}, {"e", "g", 8}, {"j", "n", 4}});
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
|
||||||
bytewise_icmp);
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber,
|
|
||||||
bytewise_icmp));
|
|
||||||
|
|
||||||
InternalKey smallest("d", 7, kTypeValue);
|
|
||||||
InternalKey largest("m", 9, kTypeValue);
|
|
||||||
TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp,
|
|
||||||
&smallest, &largest);
|
|
||||||
|
|
||||||
VerifyIterator(&iter, bytewise_icmp,
|
|
||||||
{{InternalValue("d", 7), UncutEndpoint("e"), 10},
|
|
||||||
{UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{UncutEndpoint("j"), InternalValue("m", 8), 4}});
|
|
||||||
|
|
||||||
VerifySeek(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", InternalValue("d", 7), UncutEndpoint("e"), 10},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("j"), InternalValue("m", 8), 4},
|
|
||||||
{"n", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */},
|
|
||||||
{"", InternalValue("d", 7), UncutEndpoint("e"), 10}});
|
|
||||||
|
|
||||||
VerifySeekForPrev(
|
|
||||||
&iter, bytewise_icmp,
|
|
||||||
{{"d", InternalValue("d", 7), UncutEndpoint("e"), 10},
|
|
||||||
{"e", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"ia", UncutEndpoint("e"), UncutEndpoint("g"), 8},
|
|
||||||
{"n", UncutEndpoint("j"), InternalValue("m", 8), 4},
|
|
||||||
{"", UncutEndpoint(""), UncutEndpoint(""), 0, true /* invalid */}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, SingleIterInAggregator) {
|
|
||||||
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}});
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
|
||||||
bytewise_icmp);
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber,
|
|
||||||
bytewise_icmp));
|
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber);
|
|
||||||
range_del_agg.AddTombstones(std::move(input_iter));
|
|
||||||
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false},
|
|
||||||
{InternalValue("b", 9), true},
|
|
||||||
{InternalValue("d", 9), true},
|
|
||||||
{InternalValue("e", 7), true},
|
|
||||||
{InternalValue("g", 7), false}});
|
|
||||||
|
|
||||||
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
|
|
||||||
{"_", "a", true},
|
|
||||||
{"a", "c", true},
|
|
||||||
{"d", "f", true},
|
|
||||||
{"g", "l", false}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregator) {
|
|
||||||
auto fragment_lists = MakeFragmentedTombstoneLists(
|
|
||||||
{{{"a", "e", 10}, {"c", "g", 8}},
|
|
||||||
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
|
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber);
|
|
||||||
for (const auto& fragment_list : fragment_lists) {
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(
|
|
||||||
fragment_list.get(), kMaxSequenceNumber, bytewise_icmp));
|
|
||||||
range_del_agg.AddTombstones(std::move(input_iter));
|
|
||||||
}
|
|
||||||
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), true},
|
|
||||||
{InternalValue("b", 19), false},
|
|
||||||
{InternalValue("b", 9), true},
|
|
||||||
{InternalValue("d", 9), true},
|
|
||||||
{InternalValue("e", 7), true},
|
|
||||||
{InternalValue("g", 7), false},
|
|
||||||
{InternalValue("h", 24), true},
|
|
||||||
{InternalValue("i", 24), false},
|
|
||||||
{InternalValue("ii", 14), true},
|
|
||||||
{InternalValue("j", 14), false}});
|
|
||||||
|
|
||||||
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
|
|
||||||
{"_", "a", true},
|
|
||||||
{"a", "c", true},
|
|
||||||
{"d", "f", true},
|
|
||||||
{"g", "l", true},
|
|
||||||
{"x", "y", false}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregatorWithUpperBound) {
|
|
||||||
auto fragment_lists = MakeFragmentedTombstoneLists(
|
|
||||||
{{{"a", "e", 10}, {"c", "g", 8}},
|
|
||||||
{{"a", "b", 20}, {"h", "i", 25}, {"ii", "j", 15}}});
|
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, 19);
|
|
||||||
for (const auto& fragment_list : fragment_lists) {
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(fragment_list.get(),
|
|
||||||
19 /* snapshot */, bytewise_icmp));
|
|
||||||
range_del_agg.AddTombstones(std::move(input_iter));
|
|
||||||
}
|
|
||||||
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 19), false},
|
|
||||||
{InternalValue("a", 9), true},
|
|
||||||
{InternalValue("b", 9), true},
|
|
||||||
{InternalValue("d", 9), true},
|
|
||||||
{InternalValue("e", 7), true},
|
|
||||||
{InternalValue("g", 7), false},
|
|
||||||
{InternalValue("h", 24), false},
|
|
||||||
{InternalValue("i", 24), false},
|
|
||||||
{InternalValue("ii", 14), true},
|
|
||||||
{InternalValue("j", 14), false}});
|
|
||||||
|
|
||||||
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
|
|
||||||
{"_", "a", true},
|
|
||||||
{"a", "c", true},
|
|
||||||
{"d", "f", true},
|
|
||||||
{"g", "l", true},
|
|
||||||
{"x", "y", false}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregator) {
|
|
||||||
auto fragment_lists = MakeFragmentedTombstoneLists(
|
|
||||||
{{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}});
|
|
||||||
std::vector<std::pair<InternalKey, InternalKey>> iter_bounds = {
|
|
||||||
{InternalKey("a", 4, kTypeValue),
|
|
||||||
InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)},
|
|
||||||
{InternalKey("m", 20, kTypeValue),
|
|
||||||
InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)},
|
|
||||||
{InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}};
|
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, 19);
|
|
||||||
for (size_t i = 0; i < fragment_lists.size(); i++) {
|
|
||||||
const auto& fragment_list = fragment_lists[i];
|
|
||||||
const auto& bounds = iter_bounds[i];
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(fragment_list.get(),
|
|
||||||
19 /* snapshot */, bytewise_icmp));
|
|
||||||
range_del_agg.AddTombstones(std::move(input_iter), &bounds.first,
|
|
||||||
&bounds.second);
|
|
||||||
}
|
|
||||||
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false},
|
|
||||||
{InternalValue("a", 9), false},
|
|
||||||
{InternalValue("a", 4), true},
|
|
||||||
{InternalValue("m", 10), false},
|
|
||||||
{InternalValue("m", 9), true},
|
|
||||||
{InternalValue("x", 10), false},
|
|
||||||
{InternalValue("x", 9), false},
|
|
||||||
{InternalValue("x", 5), true},
|
|
||||||
{InternalValue("z", 9), false}});
|
|
||||||
|
|
||||||
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
|
|
||||||
{"_", "a", true},
|
|
||||||
{"a", "n", true},
|
|
||||||
{"l", "x", true},
|
|
||||||
{"w", "z", true},
|
|
||||||
{"zzz", "zz", false},
|
|
||||||
{"zz", "zzz", false}});
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregatorSameLevel) {
|
|
||||||
auto fragment_lists = MakeFragmentedTombstoneLists(
|
|
||||||
{{{"a", "z", 10}}, {{"a", "z", 10}}, {{"a", "z", 10}}});
|
|
||||||
std::vector<std::pair<InternalKey, InternalKey>> iter_bounds = {
|
|
||||||
{InternalKey("a", 4, kTypeValue),
|
|
||||||
InternalKey("m", kMaxSequenceNumber, kTypeRangeDeletion)},
|
|
||||||
{InternalKey("m", 20, kTypeValue),
|
|
||||||
InternalKey("x", kMaxSequenceNumber, kTypeRangeDeletion)},
|
|
||||||
{InternalKey("x", 5, kTypeValue), InternalKey("zz", 30, kTypeValue)}};
|
|
||||||
|
|
||||||
RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, 19);
|
|
||||||
|
|
||||||
auto add_iter_to_agg = [&](size_t i) {
|
|
||||||
std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter(
|
|
||||||
new FragmentedRangeTombstoneIterator(fragment_lists[i].get(),
|
|
||||||
19 /* snapshot */, bytewise_icmp));
|
|
||||||
range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first,
|
|
||||||
&iter_bounds[i].second);
|
|
||||||
};
|
|
||||||
|
|
||||||
add_iter_to_agg(0);
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("a", 10), false},
|
|
||||||
{InternalValue("a", 9), false},
|
|
||||||
{InternalValue("a", 4), true}});
|
|
||||||
|
|
||||||
add_iter_to_agg(1);
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("m", 10), false},
|
|
||||||
{InternalValue("m", 9), true}});
|
|
||||||
|
|
||||||
add_iter_to_agg(2);
|
|
||||||
VerifyShouldDelete(&range_del_agg, {{InternalValue("x", 10), false},
|
|
||||||
{InternalValue("x", 9), false},
|
|
||||||
{InternalValue("x", 5), true},
|
|
||||||
{InternalValue("z", 9), false}});
|
|
||||||
|
|
||||||
VerifyIsRangeOverlapped(&range_del_agg, {{"", "_", false},
|
|
||||||
{"_", "a", true},
|
|
||||||
{"a", "n", true},
|
|
||||||
{"l", "x", true},
|
|
||||||
{"w", "z", true},
|
|
||||||
{"zzz", "zz", false},
|
|
||||||
{"zz", "zzz", false}});
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace rocksdb
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
|
||||||
return RUN_ALL_TESTS();
|
|
||||||
}
|
|
@ -20,7 +20,8 @@ namespace rocksdb {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList::FragmentedRangeTombstoneList(
|
FragmentedRangeTombstoneList::FragmentedRangeTombstoneList(
|
||||||
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
||||||
const InternalKeyComparator& icmp) {
|
const InternalKeyComparator& icmp, bool for_compaction,
|
||||||
|
const std::vector<SequenceNumber>& snapshots) {
|
||||||
if (unfragmented_tombstones == nullptr) {
|
if (unfragmented_tombstones == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -43,7 +44,8 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (is_sorted) {
|
if (is_sorted) {
|
||||||
FragmentTombstones(std::move(unfragmented_tombstones), icmp);
|
FragmentTombstones(std::move(unfragmented_tombstones), icmp, for_compaction,
|
||||||
|
snapshots);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,12 +63,13 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList(
|
|||||||
// VectorIterator implicitly sorts by key during construction.
|
// VectorIterator implicitly sorts by key during construction.
|
||||||
auto iter = std::unique_ptr<VectorIterator>(
|
auto iter = std::unique_ptr<VectorIterator>(
|
||||||
new VectorIterator(std::move(keys), std::move(values), &icmp));
|
new VectorIterator(std::move(keys), std::move(values), &icmp));
|
||||||
FragmentTombstones(std::move(iter), icmp);
|
FragmentTombstones(std::move(iter), icmp, for_compaction, snapshots);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneList::FragmentTombstones(
|
void FragmentedRangeTombstoneList::FragmentTombstones(
|
||||||
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
||||||
const InternalKeyComparator& icmp) {
|
const InternalKeyComparator& icmp, bool for_compaction,
|
||||||
|
const std::vector<SequenceNumber>& snapshots) {
|
||||||
Slice cur_start_key(nullptr, 0);
|
Slice cur_start_key(nullptr, 0);
|
||||||
auto cmp = ParsedInternalKeyComparator(&icmp);
|
auto cmp = ParsedInternalKeyComparator(&icmp);
|
||||||
|
|
||||||
@ -117,10 +120,38 @@ void FragmentedRangeTombstoneList::FragmentTombstones(
|
|||||||
}
|
}
|
||||||
std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(),
|
std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(),
|
||||||
std::greater<SequenceNumber>());
|
std::greater<SequenceNumber>());
|
||||||
|
|
||||||
size_t start_idx = tombstone_seqs_.size();
|
size_t start_idx = tombstone_seqs_.size();
|
||||||
size_t end_idx = start_idx + seqnums_to_flush.size();
|
size_t end_idx = start_idx + seqnums_to_flush.size();
|
||||||
|
|
||||||
|
if (for_compaction) {
|
||||||
|
// Drop all tombstone seqnums that are not preserved by a snapshot.
|
||||||
|
SequenceNumber next_snapshot = kMaxSequenceNumber;
|
||||||
|
for (auto seq : seqnums_to_flush) {
|
||||||
|
if (seq <= next_snapshot) {
|
||||||
|
// This seqnum is visible by a lower snapshot.
|
||||||
|
tombstone_seqs_.push_back(seq);
|
||||||
|
seq_set_.insert(seq);
|
||||||
|
auto upper_bound_it =
|
||||||
|
std::lower_bound(snapshots.begin(), snapshots.end(), seq);
|
||||||
|
if (upper_bound_it == snapshots.begin()) {
|
||||||
|
// This seqnum is the topmost one visible by the earliest
|
||||||
|
// snapshot. None of the seqnums below it will be visible, so we
|
||||||
|
// can skip them.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
next_snapshot = *std::prev(upper_bound_it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end_idx = tombstone_seqs_.size();
|
||||||
|
} else {
|
||||||
|
// The fragmentation is being done for reads, so preserve all seqnums.
|
||||||
tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(),
|
tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(),
|
||||||
seqnums_to_flush.end());
|
seqnums_to_flush.end());
|
||||||
|
seq_set_.insert(seqnums_to_flush.begin(), seqnums_to_flush.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(start_idx < end_idx);
|
||||||
tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, end_idx);
|
tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, end_idx);
|
||||||
|
|
||||||
cur_start_key = cur_end_key;
|
cur_start_key = cur_end_key;
|
||||||
@ -143,6 +174,11 @@ void FragmentedRangeTombstoneList::FragmentTombstones(
|
|||||||
const Slice& ikey = unfragmented_tombstones->key();
|
const Slice& ikey = unfragmented_tombstones->key();
|
||||||
Slice tombstone_start_key = ExtractUserKey(ikey);
|
Slice tombstone_start_key = ExtractUserKey(ikey);
|
||||||
SequenceNumber tombstone_seq = GetInternalKeySeqno(ikey);
|
SequenceNumber tombstone_seq = GetInternalKeySeqno(ikey);
|
||||||
|
if (!unfragmented_tombstones->IsKeyPinned()) {
|
||||||
|
pinned_slices_.emplace_back(tombstone_start_key.data(),
|
||||||
|
tombstone_start_key.size());
|
||||||
|
tombstone_start_key = pinned_slices_.back();
|
||||||
|
}
|
||||||
no_tombstones = false;
|
no_tombstones = false;
|
||||||
|
|
||||||
Slice tombstone_end_key = unfragmented_tombstones->value();
|
Slice tombstone_end_key = unfragmented_tombstones->value();
|
||||||
@ -157,13 +193,7 @@ void FragmentedRangeTombstoneList::FragmentTombstones(
|
|||||||
// this new start key.
|
// this new start key.
|
||||||
flush_current_tombstones(tombstone_start_key);
|
flush_current_tombstones(tombstone_start_key);
|
||||||
}
|
}
|
||||||
if (unfragmented_tombstones->IsKeyPinned()) {
|
|
||||||
cur_start_key = tombstone_start_key;
|
cur_start_key = tombstone_start_key;
|
||||||
} else {
|
|
||||||
pinned_slices_.emplace_back(tombstone_start_key.data(),
|
|
||||||
tombstone_start_key.size());
|
|
||||||
cur_start_key = pinned_slices_.back();
|
|
||||||
}
|
|
||||||
|
|
||||||
cur_end_keys.emplace(tombstone_end_key, tombstone_seq, kTypeRangeDeletion);
|
cur_end_keys.emplace(tombstone_end_key, tombstone_seq, kTypeRangeDeletion);
|
||||||
}
|
}
|
||||||
@ -178,33 +208,41 @@ void FragmentedRangeTombstoneList::FragmentTombstones(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool FragmentedRangeTombstoneList::ContainsRange(SequenceNumber lower,
|
||||||
|
SequenceNumber upper) const {
|
||||||
|
auto seq_it = seq_set_.lower_bound(lower);
|
||||||
|
return seq_it != seq_set_.end() && *seq_it <= upper;
|
||||||
|
}
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
|
FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
|
||||||
const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot,
|
const FragmentedRangeTombstoneList* tombstones,
|
||||||
const InternalKeyComparator& icmp)
|
const InternalKeyComparator& icmp, SequenceNumber _upper_bound,
|
||||||
|
SequenceNumber _lower_bound)
|
||||||
: tombstone_start_cmp_(icmp.user_comparator()),
|
: tombstone_start_cmp_(icmp.user_comparator()),
|
||||||
tombstone_end_cmp_(icmp.user_comparator()),
|
tombstone_end_cmp_(icmp.user_comparator()),
|
||||||
|
icmp_(&icmp),
|
||||||
ucmp_(icmp.user_comparator()),
|
ucmp_(icmp.user_comparator()),
|
||||||
tombstones_(tombstones),
|
tombstones_(tombstones),
|
||||||
snapshot_(snapshot) {
|
upper_bound_(_upper_bound),
|
||||||
|
lower_bound_(_lower_bound) {
|
||||||
assert(tombstones_ != nullptr);
|
assert(tombstones_ != nullptr);
|
||||||
pos_ = tombstones_->end();
|
Invalidate();
|
||||||
pinned_pos_ = tombstones_->end();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
|
FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator(
|
||||||
const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
|
const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
|
||||||
SequenceNumber snapshot, const InternalKeyComparator& icmp)
|
const InternalKeyComparator& icmp, SequenceNumber _upper_bound,
|
||||||
|
SequenceNumber _lower_bound)
|
||||||
: tombstone_start_cmp_(icmp.user_comparator()),
|
: tombstone_start_cmp_(icmp.user_comparator()),
|
||||||
tombstone_end_cmp_(icmp.user_comparator()),
|
tombstone_end_cmp_(icmp.user_comparator()),
|
||||||
|
icmp_(&icmp),
|
||||||
ucmp_(icmp.user_comparator()),
|
ucmp_(icmp.user_comparator()),
|
||||||
tombstones_ref_(tombstones),
|
tombstones_ref_(tombstones),
|
||||||
tombstones_(tombstones_ref_.get()),
|
tombstones_(tombstones_ref_.get()),
|
||||||
snapshot_(snapshot) {
|
upper_bound_(_upper_bound),
|
||||||
|
lower_bound_(_lower_bound) {
|
||||||
assert(tombstones_ != nullptr);
|
assert(tombstones_ != nullptr);
|
||||||
pos_ = tombstones_->end();
|
Invalidate();
|
||||||
seq_pos_ = tombstones_->seq_end();
|
|
||||||
pinned_pos_ = tombstones_->end();
|
|
||||||
pinned_seq_pos_ = tombstones_->seq_end();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneIterator::SeekToFirst() {
|
void FragmentedRangeTombstoneIterator::SeekToFirst() {
|
||||||
@ -220,7 +258,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopFirst() {
|
|||||||
pos_ = tombstones_->begin();
|
pos_ = tombstones_->begin();
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
ScanForwardToVisibleTombstone();
|
ScanForwardToVisibleTombstone();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -237,7 +275,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopLast() {
|
|||||||
pos_ = std::prev(tombstones_->end());
|
pos_ = std::prev(tombstones_->end());
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
ScanBackwardToVisibleTombstone();
|
ScanBackwardToVisibleTombstone();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -270,7 +308,7 @@ void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone(
|
|||||||
}
|
}
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone(
|
void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone(
|
||||||
@ -289,25 +327,28 @@ void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone(
|
|||||||
--pos_;
|
--pos_;
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() {
|
void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() {
|
||||||
while (pos_ != tombstones_->end() &&
|
while (pos_ != tombstones_->end() &&
|
||||||
seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) {
|
(seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) ||
|
||||||
|
*seq_pos_ < lower_bound_)) {
|
||||||
++pos_;
|
++pos_;
|
||||||
if (pos_ == tombstones_->end()) {
|
if (pos_ == tombstones_->end()) {
|
||||||
|
Invalidate();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() {
|
void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() {
|
||||||
while (pos_ != tombstones_->end() &&
|
while (pos_ != tombstones_->end() &&
|
||||||
seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) {
|
(seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) ||
|
||||||
|
*seq_pos_ < lower_bound_)) {
|
||||||
if (pos_ == tombstones_->begin()) {
|
if (pos_ == tombstones_->begin()) {
|
||||||
Invalidate();
|
Invalidate();
|
||||||
return;
|
return;
|
||||||
@ -315,7 +356,7 @@ void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() {
|
|||||||
--pos_;
|
--pos_;
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -333,14 +374,13 @@ void FragmentedRangeTombstoneIterator::TopNext() {
|
|||||||
}
|
}
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
ScanForwardToVisibleTombstone();
|
ScanForwardToVisibleTombstone();
|
||||||
}
|
}
|
||||||
|
|
||||||
void FragmentedRangeTombstoneIterator::Prev() {
|
void FragmentedRangeTombstoneIterator::Prev() {
|
||||||
if (seq_pos_ == tombstones_->seq_begin()) {
|
if (seq_pos_ == tombstones_->seq_begin()) {
|
||||||
pos_ = tombstones_->end();
|
Invalidate();
|
||||||
seq_pos_ = tombstones_->seq_end();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
--seq_pos_;
|
--seq_pos_;
|
||||||
@ -358,7 +398,7 @@ void FragmentedRangeTombstoneIterator::TopPrev() {
|
|||||||
--pos_;
|
--pos_;
|
||||||
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx),
|
||||||
tombstones_->seq_iter(pos_->seq_end_idx),
|
tombstones_->seq_iter(pos_->seq_end_idx),
|
||||||
snapshot_, std::greater<SequenceNumber>());
|
upper_bound_, std::greater<SequenceNumber>());
|
||||||
ScanBackwardToVisibleTombstone();
|
ScanBackwardToVisibleTombstone();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -372,4 +412,27 @@ SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum(
|
|||||||
return ValidPos() && ucmp_->Compare(start_key(), user_key) <= 0 ? seq() : 0;
|
return ValidPos() && ucmp_->Compare(start_key(), user_key) <= 0 ? seq() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
FragmentedRangeTombstoneIterator::SplitBySnapshot(
|
||||||
|
const std::vector<SequenceNumber>& snapshots) {
|
||||||
|
std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
splits;
|
||||||
|
SequenceNumber lower = 0;
|
||||||
|
SequenceNumber upper;
|
||||||
|
for (size_t i = 0; i <= snapshots.size(); i++) {
|
||||||
|
if (i >= snapshots.size()) {
|
||||||
|
upper = kMaxSequenceNumber;
|
||||||
|
} else {
|
||||||
|
upper = snapshots[i];
|
||||||
|
}
|
||||||
|
if (tombstones_->ContainsRange(lower, upper)) {
|
||||||
|
splits.emplace(upper, std::unique_ptr<FragmentedRangeTombstoneIterator>(
|
||||||
|
new FragmentedRangeTombstoneIterator(
|
||||||
|
tombstones_, *icmp_, upper, lower)));
|
||||||
|
}
|
||||||
|
lower = upper + 1;
|
||||||
|
}
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -38,7 +39,8 @@ struct FragmentedRangeTombstoneList {
|
|||||||
};
|
};
|
||||||
FragmentedRangeTombstoneList(
|
FragmentedRangeTombstoneList(
|
||||||
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
||||||
const InternalKeyComparator& icmp);
|
const InternalKeyComparator& icmp, bool for_compaction = false,
|
||||||
|
const std::vector<SequenceNumber>& snapshots = {});
|
||||||
|
|
||||||
std::vector<RangeTombstoneStack>::const_iterator begin() const {
|
std::vector<RangeTombstoneStack>::const_iterator begin() const {
|
||||||
return tombstones_.begin();
|
return tombstones_.begin();
|
||||||
@ -60,7 +62,11 @@ struct FragmentedRangeTombstoneList {
|
|||||||
return tombstone_seqs_.end();
|
return tombstone_seqs_.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool empty() const { return tombstones_.size() == 0; }
|
bool empty() const { return tombstones_.empty(); }
|
||||||
|
|
||||||
|
// Returns true if the stored tombstones contain with one with a sequence
|
||||||
|
// number in [lower, upper].
|
||||||
|
bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Given an ordered range tombstone iterator unfragmented_tombstones,
|
// Given an ordered range tombstone iterator unfragmented_tombstones,
|
||||||
@ -68,10 +74,12 @@ struct FragmentedRangeTombstoneList {
|
|||||||
// tombstones_ and tombstone_seqs_.
|
// tombstones_ and tombstone_seqs_.
|
||||||
void FragmentTombstones(
|
void FragmentTombstones(
|
||||||
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
std::unique_ptr<InternalIterator> unfragmented_tombstones,
|
||||||
const InternalKeyComparator& icmp);
|
const InternalKeyComparator& icmp, bool for_compaction,
|
||||||
|
const std::vector<SequenceNumber>& snapshots);
|
||||||
|
|
||||||
std::vector<RangeTombstoneStack> tombstones_;
|
std::vector<RangeTombstoneStack> tombstones_;
|
||||||
std::vector<SequenceNumber> tombstone_seqs_;
|
std::vector<SequenceNumber> tombstone_seqs_;
|
||||||
|
std::set<SequenceNumber> seq_set_;
|
||||||
std::list<std::string> pinned_slices_;
|
std::list<std::string> pinned_slices_;
|
||||||
PinnedIteratorsManager pinned_iters_mgr_;
|
PinnedIteratorsManager pinned_iters_mgr_;
|
||||||
};
|
};
|
||||||
@ -88,11 +96,13 @@ struct FragmentedRangeTombstoneList {
|
|||||||
class FragmentedRangeTombstoneIterator : public InternalIterator {
|
class FragmentedRangeTombstoneIterator : public InternalIterator {
|
||||||
public:
|
public:
|
||||||
FragmentedRangeTombstoneIterator(
|
FragmentedRangeTombstoneIterator(
|
||||||
const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot,
|
const FragmentedRangeTombstoneList* tombstones,
|
||||||
const InternalKeyComparator& icmp);
|
const InternalKeyComparator& icmp, SequenceNumber upper_bound,
|
||||||
|
SequenceNumber lower_bound = 0);
|
||||||
FragmentedRangeTombstoneIterator(
|
FragmentedRangeTombstoneIterator(
|
||||||
const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
|
const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
|
||||||
SequenceNumber snapshot, const InternalKeyComparator& icmp);
|
const InternalKeyComparator& icmp, SequenceNumber upper_bound,
|
||||||
|
SequenceNumber lower_bound = 0);
|
||||||
|
|
||||||
void SeekToFirst() override;
|
void SeekToFirst() override;
|
||||||
void SeekToLast() override;
|
void SeekToLast() override;
|
||||||
@ -136,8 +146,7 @@ class FragmentedRangeTombstoneIterator : public InternalIterator {
|
|||||||
seq_pos_ = tombstones_->seq_end();
|
seq_pos_ = tombstones_->seq_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: implement properly
|
RangeTombstone Tombstone() const {
|
||||||
RangeTombstone tombstone() const {
|
|
||||||
return RangeTombstone(start_key(), end_key(), seq());
|
return RangeTombstone(start_key(), end_key(), seq());
|
||||||
}
|
}
|
||||||
Slice start_key() const { return pos_->start_key; }
|
Slice start_key() const { return pos_->start_key; }
|
||||||
@ -151,12 +160,24 @@ class FragmentedRangeTombstoneIterator : public InternalIterator {
|
|||||||
return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
|
return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
|
||||||
kTypeRangeDeletion);
|
kTypeRangeDeletion);
|
||||||
}
|
}
|
||||||
ParsedInternalKey internal_key() const {
|
|
||||||
return ParsedInternalKey(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
|
|
||||||
}
|
|
||||||
|
|
||||||
SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
|
SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
|
||||||
|
|
||||||
|
// Splits the iterator into n+1 iterators (where n is the number of
|
||||||
|
// snapshots), each providing a view over a "stripe" of sequence numbers. The
|
||||||
|
// iterators are keyed by the upper bound of their ranges (the provided
|
||||||
|
// snapshots + kMaxSequenceNumber).
|
||||||
|
//
|
||||||
|
// NOTE: the iterators in the returned map are no longer valid if their
|
||||||
|
// parent iterator is deleted, since they do not modify the refcount of the
|
||||||
|
// underlying tombstone list. Therefore, this map should be deleted before
|
||||||
|
// the parent iterator.
|
||||||
|
std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
|
||||||
|
|
||||||
|
SequenceNumber upper_bound() const { return upper_bound_; }
|
||||||
|
SequenceNumber lower_bound() const { return lower_bound_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
|
using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
|
||||||
|
|
||||||
@ -217,10 +238,12 @@ class FragmentedRangeTombstoneIterator : public InternalIterator {
|
|||||||
|
|
||||||
const RangeTombstoneStackStartComparator tombstone_start_cmp_;
|
const RangeTombstoneStackStartComparator tombstone_start_cmp_;
|
||||||
const RangeTombstoneStackEndComparator tombstone_end_cmp_;
|
const RangeTombstoneStackEndComparator tombstone_end_cmp_;
|
||||||
|
const InternalKeyComparator* icmp_;
|
||||||
const Comparator* ucmp_;
|
const Comparator* ucmp_;
|
||||||
std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
|
std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
|
||||||
const FragmentedRangeTombstoneList* tombstones_;
|
const FragmentedRangeTombstoneList* tombstones_;
|
||||||
SequenceNumber snapshot_;
|
SequenceNumber upper_bound_;
|
||||||
|
SequenceNumber lower_bound_;
|
||||||
std::vector<RangeTombstoneStack>::const_iterator pos_;
|
std::vector<RangeTombstoneStack>::const_iterator pos_;
|
||||||
std::vector<SequenceNumber>::const_iterator seq_pos_;
|
std::vector<SequenceNumber>::const_iterator seq_pos_;
|
||||||
mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
|
mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
|
||||||
|
@ -29,15 +29,26 @@ std::unique_ptr<InternalIterator> MakeRangeDelIter(
|
|||||||
new test::VectorIterator(keys, values));
|
new test::VectorIterator(keys, values));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckIterPosition(const RangeTombstone& tombstone,
|
||||||
|
const FragmentedRangeTombstoneIterator* iter) {
|
||||||
|
// Test InternalIterator interface.
|
||||||
|
EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key()));
|
||||||
|
EXPECT_EQ(tombstone.end_key_, iter->value());
|
||||||
|
EXPECT_EQ(tombstone.seq_, iter->seq());
|
||||||
|
|
||||||
|
// Test FragmentedRangeTombstoneIterator interface.
|
||||||
|
EXPECT_EQ(tombstone.start_key_, iter->start_key());
|
||||||
|
EXPECT_EQ(tombstone.end_key_, iter->end_key());
|
||||||
|
EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key()));
|
||||||
|
}
|
||||||
|
|
||||||
void VerifyFragmentedRangeDels(
|
void VerifyFragmentedRangeDels(
|
||||||
FragmentedRangeTombstoneIterator* iter,
|
FragmentedRangeTombstoneIterator* iter,
|
||||||
const std::vector<RangeTombstone>& expected_tombstones) {
|
const std::vector<RangeTombstone>& expected_tombstones) {
|
||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
for (size_t i = 0; i < expected_tombstones.size() && iter->Valid();
|
for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) {
|
||||||
i++, iter->Next()) {
|
ASSERT_TRUE(iter->Valid());
|
||||||
EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_);
|
CheckIterPosition(expected_tombstones[i], iter);
|
||||||
EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_);
|
|
||||||
EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_);
|
|
||||||
}
|
}
|
||||||
EXPECT_FALSE(iter->Valid());
|
EXPECT_FALSE(iter->Valid());
|
||||||
}
|
}
|
||||||
@ -46,11 +57,9 @@ void VerifyVisibleTombstones(
|
|||||||
FragmentedRangeTombstoneIterator* iter,
|
FragmentedRangeTombstoneIterator* iter,
|
||||||
const std::vector<RangeTombstone>& expected_tombstones) {
|
const std::vector<RangeTombstone>& expected_tombstones) {
|
||||||
iter->SeekToTopFirst();
|
iter->SeekToTopFirst();
|
||||||
for (size_t i = 0; i < expected_tombstones.size() && iter->Valid();
|
for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) {
|
||||||
i++, iter->TopNext()) {
|
ASSERT_TRUE(iter->Valid());
|
||||||
EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_);
|
CheckIterPosition(expected_tombstones[i], iter);
|
||||||
EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_);
|
|
||||||
EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_);
|
|
||||||
}
|
}
|
||||||
EXPECT_FALSE(iter->Valid());
|
EXPECT_FALSE(iter->Valid());
|
||||||
}
|
}
|
||||||
@ -69,9 +78,7 @@ void VerifySeek(FragmentedRangeTombstoneIterator* iter,
|
|||||||
ASSERT_FALSE(iter->Valid());
|
ASSERT_FALSE(iter->Valid());
|
||||||
} else {
|
} else {
|
||||||
ASSERT_TRUE(iter->Valid());
|
ASSERT_TRUE(iter->Valid());
|
||||||
EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key());
|
CheckIterPosition(testcase.expected_position, iter);
|
||||||
EXPECT_EQ(testcase.expected_position.end_key_, iter->value());
|
|
||||||
EXPECT_EQ(testcase.expected_position.seq_, iter->seq());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -84,9 +91,7 @@ void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter,
|
|||||||
ASSERT_FALSE(iter->Valid());
|
ASSERT_FALSE(iter->Valid());
|
||||||
} else {
|
} else {
|
||||||
ASSERT_TRUE(iter->Valid());
|
ASSERT_TRUE(iter->Valid());
|
||||||
EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key());
|
CheckIterPosition(testcase.expected_position, iter);
|
||||||
EXPECT_EQ(testcase.expected_position.end_key_, iter->value());
|
|
||||||
EXPECT_EQ(testcase.expected_position.seq_, iter->seq());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -112,8 +117,10 @@ TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}});
|
VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
||||||
{{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}});
|
{{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}});
|
||||||
@ -124,8 +131,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(
|
VerifyFragmentedRangeDels(
|
||||||
&iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}});
|
&iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
||||||
@ -138,8 +147,10 @@ TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(
|
VerifyFragmentedRangeDels(
|
||||||
&iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}});
|
&iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
VerifyMaxCoveringTombstoneSeqnum(&iter,
|
||||||
@ -152,8 +163,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(&iter,
|
VerifyFragmentedRangeDels(&iter,
|
||||||
{{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}});
|
{{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}});
|
VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}});
|
||||||
@ -165,8 +178,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
||||||
{"a", "c", 7},
|
{"a", "c", 7},
|
||||||
{"a", "c", 3},
|
{"a", "c", 3},
|
||||||
@ -186,8 +201,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(&iter, {{"a", "c", 30},
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 30},
|
||||||
{"a", "c", 20},
|
{"a", "c", 20},
|
||||||
{"a", "c", 10},
|
{"a", "c", 10},
|
||||||
@ -211,16 +228,16 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
FragmentedRangeTombstoneIterator iter2(&fragment_list, 9 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
9 /* upper_bound */);
|
||||||
FragmentedRangeTombstoneIterator iter3(&fragment_list, 7 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
7 /* upper_bound */);
|
||||||
FragmentedRangeTombstoneIterator iter4(&fragment_list, 5 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
5 /* upper_bound */);
|
||||||
FragmentedRangeTombstoneIterator iter5(&fragment_list, 3 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
3 /* upper_bound */);
|
||||||
for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) {
|
for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) {
|
||||||
VerifyFragmentedRangeDels(iter, {{"a", "c", 10},
|
VerifyFragmentedRangeDels(iter, {{"a", "c", 10},
|
||||||
{"c", "e", 10},
|
{"c", "e", 10},
|
||||||
@ -234,6 +251,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
|
|||||||
{"l", "n", 4}});
|
{"l", "n", 4}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(0, iter1.lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound());
|
||||||
VerifyVisibleTombstones(&iter1, {{"a", "c", 10},
|
VerifyVisibleTombstones(&iter1, {{"a", "c", 10},
|
||||||
{"c", "e", 10},
|
{"c", "e", 10},
|
||||||
{"e", "g", 8},
|
{"e", "g", 8},
|
||||||
@ -243,6 +262,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
|
|||||||
VerifyMaxCoveringTombstoneSeqnum(
|
VerifyMaxCoveringTombstoneSeqnum(
|
||||||
&iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
&iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
||||||
|
|
||||||
|
ASSERT_EQ(0, iter2.lower_bound());
|
||||||
|
ASSERT_EQ(9, iter2.upper_bound());
|
||||||
VerifyVisibleTombstones(&iter2, {{"c", "e", 8},
|
VerifyVisibleTombstones(&iter2, {{"c", "e", 8},
|
||||||
{"e", "g", 8},
|
{"e", "g", 8},
|
||||||
{"g", "i", 6},
|
{"g", "i", 6},
|
||||||
@ -251,6 +272,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
|
|||||||
VerifyMaxCoveringTombstoneSeqnum(
|
VerifyMaxCoveringTombstoneSeqnum(
|
||||||
&iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
&iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
||||||
|
|
||||||
|
ASSERT_EQ(0, iter3.lower_bound());
|
||||||
|
ASSERT_EQ(7, iter3.upper_bound());
|
||||||
VerifyVisibleTombstones(&iter3, {{"c", "e", 6},
|
VerifyVisibleTombstones(&iter3, {{"c", "e", 6},
|
||||||
{"e", "g", 6},
|
{"e", "g", 6},
|
||||||
{"g", "i", 6},
|
{"g", "i", 6},
|
||||||
@ -259,10 +282,14 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) {
|
|||||||
VerifyMaxCoveringTombstoneSeqnum(
|
VerifyMaxCoveringTombstoneSeqnum(
|
||||||
&iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}});
|
&iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}});
|
||||||
|
|
||||||
|
ASSERT_EQ(0, iter4.lower_bound());
|
||||||
|
ASSERT_EQ(5, iter4.upper_bound());
|
||||||
VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}});
|
VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(
|
VerifyMaxCoveringTombstoneSeqnum(
|
||||||
&iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}});
|
&iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}});
|
||||||
|
|
||||||
|
ASSERT_EQ(0, iter5.lower_bound());
|
||||||
|
ASSERT_EQ(3, iter5.upper_bound());
|
||||||
VerifyVisibleTombstones(&iter5, {{"j", "l", 2}});
|
VerifyVisibleTombstones(&iter5, {{"j", "l", 2}});
|
||||||
VerifyMaxCoveringTombstoneSeqnum(
|
VerifyMaxCoveringTombstoneSeqnum(
|
||||||
&iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}});
|
&iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}});
|
||||||
@ -277,8 +304,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) {
|
|||||||
|
|
||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, 9 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
9 /* upper_bound */);
|
||||||
|
ASSERT_EQ(0, iter.lower_bound());
|
||||||
|
ASSERT_EQ(9, iter.upper_bound());
|
||||||
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
||||||
{"c", "e", 10},
|
{"c", "e", 10},
|
||||||
{"c", "e", 8},
|
{"c", "e", 8},
|
||||||
@ -293,6 +322,116 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) {
|
|||||||
&iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
&iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) {
|
||||||
|
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
||||||
|
{"j", "n", 4},
|
||||||
|
{"c", "i", 6},
|
||||||
|
{"c", "g", 8},
|
||||||
|
{"j", "l", 2}});
|
||||||
|
|
||||||
|
FragmentedRangeTombstoneList fragment_list(
|
||||||
|
std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
|
||||||
|
{} /* snapshots */);
|
||||||
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
||||||
|
{"c", "e", 10},
|
||||||
|
{"e", "g", 8},
|
||||||
|
{"g", "i", 6},
|
||||||
|
{"j", "l", 4},
|
||||||
|
{"l", "n", 4}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeTombstoneFragmenterTest,
|
||||||
|
OverlapAndRepeatedStartKeyForCompactionWithSnapshot) {
|
||||||
|
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
||||||
|
{"j", "n", 4},
|
||||||
|
{"c", "i", 6},
|
||||||
|
{"c", "g", 8},
|
||||||
|
{"j", "l", 2}});
|
||||||
|
|
||||||
|
FragmentedRangeTombstoneList fragment_list(
|
||||||
|
std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
|
||||||
|
{20, 9} /* upper_bounds */);
|
||||||
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
||||||
|
{"c", "e", 10},
|
||||||
|
{"c", "e", 8},
|
||||||
|
{"e", "g", 8},
|
||||||
|
{"g", "i", 6},
|
||||||
|
{"j", "l", 4},
|
||||||
|
{"l", "n", 4}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) {
|
||||||
|
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
||||||
|
{"j", "n", 4},
|
||||||
|
{"c", "i", 6},
|
||||||
|
{"c", "g", 8},
|
||||||
|
{"j", "l", 2}});
|
||||||
|
|
||||||
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
|
bytewise_icmp);
|
||||||
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
|
||||||
|
auto split_iters = iter.SplitBySnapshot({} /* snapshots */);
|
||||||
|
ASSERT_EQ(1, split_iters.size());
|
||||||
|
|
||||||
|
auto* split_iter = split_iters[kMaxSequenceNumber].get();
|
||||||
|
ASSERT_EQ(0, split_iter->lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter, {{"a", "c", 10},
|
||||||
|
{"c", "e", 10},
|
||||||
|
{"e", "g", 8},
|
||||||
|
{"g", "i", 6},
|
||||||
|
{"j", "l", 4},
|
||||||
|
{"l", "n", 4}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) {
|
||||||
|
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
||||||
|
{"j", "n", 4},
|
||||||
|
{"c", "i", 6},
|
||||||
|
{"c", "g", 8},
|
||||||
|
{"j", "l", 2}});
|
||||||
|
|
||||||
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
|
bytewise_icmp);
|
||||||
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
|
||||||
|
auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */);
|
||||||
|
ASSERT_EQ(5, split_iters.size());
|
||||||
|
|
||||||
|
auto* split_iter1 = split_iters[3].get();
|
||||||
|
ASSERT_EQ(0, split_iter1->lower_bound());
|
||||||
|
ASSERT_EQ(3, split_iter1->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}});
|
||||||
|
|
||||||
|
auto* split_iter2 = split_iters[5].get();
|
||||||
|
ASSERT_EQ(4, split_iter2->lower_bound());
|
||||||
|
ASSERT_EQ(5, split_iter2->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}});
|
||||||
|
|
||||||
|
auto* split_iter3 = split_iters[7].get();
|
||||||
|
ASSERT_EQ(6, split_iter3->lower_bound());
|
||||||
|
ASSERT_EQ(7, split_iter3->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter3,
|
||||||
|
{{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}});
|
||||||
|
|
||||||
|
auto* split_iter4 = split_iters[9].get();
|
||||||
|
ASSERT_EQ(8, split_iter4->lower_bound());
|
||||||
|
ASSERT_EQ(9, split_iter4->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}});
|
||||||
|
|
||||||
|
auto* split_iter5 = split_iters[kMaxSequenceNumber].get();
|
||||||
|
ASSERT_EQ(10, split_iter5->lower_bound());
|
||||||
|
ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound());
|
||||||
|
VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}});
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) {
|
TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) {
|
||||||
// Same tombstones as OverlapAndRepeatedStartKey.
|
// Same tombstones as OverlapAndRepeatedStartKey.
|
||||||
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
auto range_del_iter = MakeRangeDelIter({{"a", "e", 10},
|
||||||
@ -304,8 +443,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) {
|
|||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
VerifySeek(
|
VerifySeek(
|
||||||
&iter1,
|
&iter1,
|
||||||
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
|
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
|
||||||
@ -313,8 +452,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) {
|
|||||||
&iter1,
|
&iter1,
|
||||||
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
|
{{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}});
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
3 /* upper_bound */);
|
||||||
VerifySeek(&iter2, {{"a", {"j", "l", 2}},
|
VerifySeek(&iter2, {{"a", {"j", "l", 2}},
|
||||||
{"e", {"j", "l", 2}},
|
{"e", {"j", "l", 2}},
|
||||||
{"l", {}, true /* out of range */}});
|
{"l", {}, true /* out of range */}});
|
||||||
@ -334,8 +473,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) {
|
|||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
VerifySeek(
|
VerifySeek(
|
||||||
&iter1,
|
&iter1,
|
||||||
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
|
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
|
||||||
@ -343,8 +482,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) {
|
|||||||
&iter1,
|
&iter1,
|
||||||
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
|
{{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}});
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
3 /* upper_bound */);
|
||||||
VerifySeek(&iter2, {{"b", {"j", "l", 2}},
|
VerifySeek(&iter2, {{"b", {"j", "l", 2}},
|
||||||
{"f", {"j", "l", 2}},
|
{"f", {"j", "l", 2}},
|
||||||
{"m", {}, true /* out of range */}});
|
{"m", {}, true /* out of range */}});
|
||||||
@ -364,8 +503,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) {
|
|||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
VerifySeek(&iter1, {{"c", {"c", "e", 10}},
|
VerifySeek(&iter1, {{"c", {"c", "e", 10}},
|
||||||
{"g", {"g", "i", 6}},
|
{"g", {"g", "i", 6}},
|
||||||
{"i", {"j", "l", 4}},
|
{"i", {"j", "l", 4}},
|
||||||
@ -375,8 +514,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) {
|
|||||||
{"i", {"g", "i", 6}},
|
{"i", {"g", "i", 6}},
|
||||||
{"n", {"l", "n", 4}}});
|
{"n", {"l", "n", 4}}});
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */,
|
FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
3 /* upper_bound */);
|
||||||
VerifySeek(&iter2, {{"c", {"j", "l", 2}},
|
VerifySeek(&iter2, {{"c", {"j", "l", 2}},
|
||||||
{"g", {"j", "l", 2}},
|
{"g", {"j", "l", 2}},
|
||||||
{"i", {"j", "l", 2}},
|
{"i", {"j", "l", 2}},
|
||||||
@ -398,8 +537,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) {
|
|||||||
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter),
|
||||||
bytewise_icmp);
|
bytewise_icmp);
|
||||||
|
|
||||||
FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber,
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
||||||
bytewise_icmp);
|
kMaxSequenceNumber);
|
||||||
VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}});
|
VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}});
|
||||||
VerifySeekForPrev(&iter,
|
VerifySeekForPrev(&iter,
|
||||||
{{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}});
|
{{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}});
|
||||||
|
11
db/repair.cc
11
db/repair.cc
@ -417,11 +417,16 @@ class Repairer {
|
|||||||
SnapshotChecker* snapshot_checker = DisableGCSnapshotChecker::Instance();
|
SnapshotChecker* snapshot_checker = DisableGCSnapshotChecker::Instance();
|
||||||
|
|
||||||
auto write_hint = cfd->CalculateSSTWriteHint(0);
|
auto write_hint = cfd->CalculateSSTWriteHint(0);
|
||||||
|
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
|
||||||
|
range_del_iters;
|
||||||
|
auto range_del_iter =
|
||||||
|
mem->NewRangeTombstoneIterator(ro, kMaxSequenceNumber);
|
||||||
|
if (range_del_iter != nullptr) {
|
||||||
|
range_del_iters.emplace_back(range_del_iter);
|
||||||
|
}
|
||||||
status = BuildTable(
|
status = BuildTable(
|
||||||
dbname_, env_, *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(),
|
dbname_, env_, *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(),
|
||||||
env_options_, table_cache_, iter.get(),
|
env_options_, table_cache_, iter.get(), std::move(range_del_iters),
|
||||||
std::unique_ptr<InternalIterator>(
|
|
||||||
mem->NewRangeTombstoneIterator(ro, vset_.LastSequence())),
|
|
||||||
&meta, cfd->internal_comparator(),
|
&meta, cfd->internal_comparator(),
|
||||||
cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(),
|
cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(),
|
||||||
{}, kMaxSequenceNumber, snapshot_checker, kNoCompression,
|
{}, kMaxSequenceNumber, snapshot_checker, kNoCompression,
|
||||||
|
@ -185,7 +185,7 @@ Status TableCache::FindTable(const EnvOptions& env_options,
|
|||||||
InternalIterator* TableCache::NewIterator(
|
InternalIterator* TableCache::NewIterator(
|
||||||
const ReadOptions& options, const EnvOptions& env_options,
|
const ReadOptions& options, const EnvOptions& env_options,
|
||||||
const InternalKeyComparator& icomparator, const FileMetaData& file_meta,
|
const InternalKeyComparator& icomparator, const FileMetaData& file_meta,
|
||||||
RangeDelAggregatorV2* range_del_agg, const SliceTransform* prefix_extractor,
|
RangeDelAggregator* range_del_agg, const SliceTransform* prefix_extractor,
|
||||||
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
|
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
|
||||||
bool for_compaction, Arena* arena, bool skip_filters, int level,
|
bool for_compaction, Arena* arena, bool skip_filters, int level,
|
||||||
const InternalKey* smallest_compaction_key,
|
const InternalKey* smallest_compaction_key,
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "options/cf_options.h"
|
#include "options/cf_options.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "rocksdb/cache.h"
|
#include "rocksdb/cache.h"
|
||||||
@ -52,7 +52,7 @@ class TableCache {
|
|||||||
InternalIterator* NewIterator(
|
InternalIterator* NewIterator(
|
||||||
const ReadOptions& options, const EnvOptions& toptions,
|
const ReadOptions& options, const EnvOptions& toptions,
|
||||||
const InternalKeyComparator& internal_comparator,
|
const InternalKeyComparator& internal_comparator,
|
||||||
const FileMetaData& file_meta, RangeDelAggregatorV2* range_del_agg,
|
const FileMetaData& file_meta, RangeDelAggregator* range_del_agg,
|
||||||
const SliceTransform* prefix_extractor = nullptr,
|
const SliceTransform* prefix_extractor = nullptr,
|
||||||
TableReader** table_reader_ptr = nullptr,
|
TableReader** table_reader_ptr = nullptr,
|
||||||
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
|
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
|
||||||
|
@ -301,17 +301,28 @@ class FilePicker {
|
|||||||
// On Level-n (n>=1), files are sorted. Binary search to find the
|
// On Level-n (n>=1), files are sorted. Binary search to find the
|
||||||
// earliest file whose largest key >= ikey. Search left bound and
|
// earliest file whose largest key >= ikey. Search left bound and
|
||||||
// right bound are used to narrow the range.
|
// right bound are used to narrow the range.
|
||||||
if (search_left_bound_ == search_right_bound_) {
|
if (search_left_bound_ <= search_right_bound_) {
|
||||||
start_index = search_left_bound_;
|
|
||||||
} else if (search_left_bound_ < search_right_bound_) {
|
|
||||||
if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
|
if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
|
||||||
search_right_bound_ =
|
search_right_bound_ =
|
||||||
static_cast<int32_t>(curr_file_level_->num_files) - 1;
|
static_cast<int32_t>(curr_file_level_->num_files) - 1;
|
||||||
}
|
}
|
||||||
|
// `search_right_bound_` is an inclusive upper-bound, but since it was
|
||||||
|
// determined based on user key, it is still possible the lookup key
|
||||||
|
// falls to the right of `search_right_bound_`'s corresponding file.
|
||||||
|
// So, pass a limit one higher, which allows us to detect this case.
|
||||||
start_index =
|
start_index =
|
||||||
FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_,
|
FindFileInRange(*internal_comparator_, *curr_file_level_, ikey_,
|
||||||
static_cast<uint32_t>(search_left_bound_),
|
static_cast<uint32_t>(search_left_bound_),
|
||||||
static_cast<uint32_t>(search_right_bound_));
|
static_cast<uint32_t>(search_right_bound_) + 1);
|
||||||
|
if (start_index == search_right_bound_ + 1) {
|
||||||
|
// `ikey_` comes after `search_right_bound_`. The lookup key does
|
||||||
|
// not exist on this level, so let's skip this level and do a full
|
||||||
|
// binary search on the next level.
|
||||||
|
search_left_bound_ = 0;
|
||||||
|
search_right_bound_ = FileIndexer::kLevelMaxIndex;
|
||||||
|
curr_level_++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// search_left_bound > search_right_bound, key does not exist in
|
// search_left_bound > search_right_bound, key does not exist in
|
||||||
// this level. Since no comparison is done in this level, it will
|
// this level. Since no comparison is done in this level, it will
|
||||||
@ -459,7 +470,7 @@ class LevelIterator final : public InternalIterator {
|
|||||||
const EnvOptions& env_options, const InternalKeyComparator& icomparator,
|
const EnvOptions& env_options, const InternalKeyComparator& icomparator,
|
||||||
const LevelFilesBrief* flevel, const SliceTransform* prefix_extractor,
|
const LevelFilesBrief* flevel, const SliceTransform* prefix_extractor,
|
||||||
bool should_sample, HistogramImpl* file_read_hist, bool for_compaction,
|
bool should_sample, HistogramImpl* file_read_hist, bool for_compaction,
|
||||||
bool skip_filters, int level, RangeDelAggregatorV2* range_del_agg,
|
bool skip_filters, int level, RangeDelAggregator* range_del_agg,
|
||||||
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
const std::vector<AtomicCompactionUnitBoundary>* compaction_boundaries =
|
||||||
nullptr)
|
nullptr)
|
||||||
: table_cache_(table_cache),
|
: table_cache_(table_cache),
|
||||||
@ -571,7 +582,7 @@ class LevelIterator final : public InternalIterator {
|
|||||||
bool skip_filters_;
|
bool skip_filters_;
|
||||||
size_t file_index_;
|
size_t file_index_;
|
||||||
int level_;
|
int level_;
|
||||||
RangeDelAggregatorV2* range_del_agg_;
|
RangeDelAggregator* range_del_agg_;
|
||||||
IteratorWrapper file_iter_; // May be nullptr
|
IteratorWrapper file_iter_; // May be nullptr
|
||||||
PinnedIteratorsManager* pinned_iters_mgr_;
|
PinnedIteratorsManager* pinned_iters_mgr_;
|
||||||
|
|
||||||
@ -985,7 +996,7 @@ double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel(
|
|||||||
void Version::AddIterators(const ReadOptions& read_options,
|
void Version::AddIterators(const ReadOptions& read_options,
|
||||||
const EnvOptions& soptions,
|
const EnvOptions& soptions,
|
||||||
MergeIteratorBuilder* merge_iter_builder,
|
MergeIteratorBuilder* merge_iter_builder,
|
||||||
RangeDelAggregatorV2* range_del_agg) {
|
RangeDelAggregator* range_del_agg) {
|
||||||
assert(storage_info_.finalized_);
|
assert(storage_info_.finalized_);
|
||||||
|
|
||||||
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
|
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
|
||||||
@ -998,7 +1009,7 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options,
|
|||||||
const EnvOptions& soptions,
|
const EnvOptions& soptions,
|
||||||
MergeIteratorBuilder* merge_iter_builder,
|
MergeIteratorBuilder* merge_iter_builder,
|
||||||
int level,
|
int level,
|
||||||
RangeDelAggregatorV2* range_del_agg) {
|
RangeDelAggregator* range_del_agg) {
|
||||||
assert(storage_info_.finalized_);
|
assert(storage_info_.finalized_);
|
||||||
if (level >= storage_info_.num_non_empty_levels()) {
|
if (level >= storage_info_.num_non_empty_levels()) {
|
||||||
// This is an empty level
|
// This is an empty level
|
||||||
@ -1057,7 +1068,7 @@ Status Version::OverlapWithLevelIterator(const ReadOptions& read_options,
|
|||||||
|
|
||||||
Arena arena;
|
Arena arena;
|
||||||
Status status;
|
Status status;
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
|
|
||||||
*overlap = false;
|
*overlap = false;
|
||||||
@ -2849,6 +2860,7 @@ Status VersionSet::ProcessManifestWrites(
|
|||||||
batch_edits.push_back(first_writer.edit_list.front());
|
batch_edits.push_back(first_writer.edit_list.front());
|
||||||
} else {
|
} else {
|
||||||
auto it = manifest_writers_.cbegin();
|
auto it = manifest_writers_.cbegin();
|
||||||
|
size_t group_start = std::numeric_limits<size_t>::max();
|
||||||
while (it != manifest_writers_.cend()) {
|
while (it != manifest_writers_.cend()) {
|
||||||
if ((*it)->edit_list.front()->IsColumnFamilyManipulation()) {
|
if ((*it)->edit_list.front()->IsColumnFamilyManipulation()) {
|
||||||
// no group commits for column family add or drop
|
// no group commits for column family add or drop
|
||||||
@ -2857,7 +2869,36 @@ Status VersionSet::ProcessManifestWrites(
|
|||||||
last_writer = *(it++);
|
last_writer = *(it++);
|
||||||
assert(last_writer != nullptr);
|
assert(last_writer != nullptr);
|
||||||
assert(last_writer->cfd != nullptr);
|
assert(last_writer->cfd != nullptr);
|
||||||
if (last_writer->cfd != nullptr && last_writer->cfd->IsDropped()) {
|
if (last_writer->cfd->IsDropped()) {
|
||||||
|
// If we detect a dropped CF at this point, and the corresponding
|
||||||
|
// version edits belong to an atomic group, then we need to find out
|
||||||
|
// the preceding version edits in the same atomic group, and update
|
||||||
|
// their `remaining_entries_` member variable because we are NOT going
|
||||||
|
// to write the version edits' of dropped CF to the MANIFEST. If we
|
||||||
|
// don't update, then Recover can report corrupted atomic group because
|
||||||
|
// the `remaining_entries_` do not match.
|
||||||
|
if (!batch_edits.empty()) {
|
||||||
|
if (batch_edits.back()->is_in_atomic_group_ &&
|
||||||
|
batch_edits.back()->remaining_entries_ > 0) {
|
||||||
|
assert(group_start < batch_edits.size());
|
||||||
|
const auto& edit_list = last_writer->edit_list;
|
||||||
|
size_t k = 0;
|
||||||
|
while (k < edit_list.size()) {
|
||||||
|
if (!edit_list[k]->is_in_atomic_group_) {
|
||||||
|
break;
|
||||||
|
} else if (edit_list[k]->remaining_entries_ == 0) {
|
||||||
|
++k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++k;
|
||||||
|
}
|
||||||
|
for (auto i = group_start; i < batch_edits.size(); ++i) {
|
||||||
|
assert(static_cast<uint32_t>(k) <=
|
||||||
|
batch_edits.back()->remaining_entries_);
|
||||||
|
batch_edits[i]->remaining_entries_ -= static_cast<uint32_t>(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// We do a linear search on versions because versions is small.
|
// We do a linear search on versions because versions is small.
|
||||||
@ -2888,6 +2929,15 @@ Status VersionSet::ProcessManifestWrites(
|
|||||||
}
|
}
|
||||||
assert(builder != nullptr); // make checker happy
|
assert(builder != nullptr); // make checker happy
|
||||||
for (const auto& e : last_writer->edit_list) {
|
for (const auto& e : last_writer->edit_list) {
|
||||||
|
if (e->is_in_atomic_group_) {
|
||||||
|
if (batch_edits.empty() || !batch_edits.back()->is_in_atomic_group_ ||
|
||||||
|
(batch_edits.back()->is_in_atomic_group_ &&
|
||||||
|
batch_edits.back()->remaining_entries_ == 0)) {
|
||||||
|
group_start = batch_edits.size();
|
||||||
|
}
|
||||||
|
} else if (group_start != std::numeric_limits<size_t>::max()) {
|
||||||
|
group_start = std::numeric_limits<size_t>::max();
|
||||||
|
}
|
||||||
LogAndApplyHelper(last_writer->cfd, builder, version, e, mu);
|
LogAndApplyHelper(last_writer->cfd, builder, version, e, mu);
|
||||||
batch_edits.push_back(e);
|
batch_edits.push_back(e);
|
||||||
}
|
}
|
||||||
@ -2900,6 +2950,42 @@ Status VersionSet::ProcessManifestWrites(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
// Verify that version edits of atomic groups have correct
|
||||||
|
// remaining_entries_.
|
||||||
|
size_t k = 0;
|
||||||
|
while (k < batch_edits.size()) {
|
||||||
|
while (k < batch_edits.size() && !batch_edits[k]->is_in_atomic_group_) {
|
||||||
|
++k;
|
||||||
|
}
|
||||||
|
if (k == batch_edits.size()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
size_t i = k;
|
||||||
|
while (i < batch_edits.size()) {
|
||||||
|
if (!batch_edits[i]->is_in_atomic_group_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(i - k + batch_edits[i]->remaining_entries_ ==
|
||||||
|
batch_edits[k]->remaining_entries_);
|
||||||
|
if (batch_edits[i]->remaining_entries_ == 0) {
|
||||||
|
++i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
assert(batch_edits[i - 1]->is_in_atomic_group_);
|
||||||
|
assert(0 == batch_edits[i - 1]->remaining_entries_);
|
||||||
|
std::vector<VersionEdit*> tmp;
|
||||||
|
for (size_t j = k; j != i; ++j) {
|
||||||
|
tmp.emplace_back(batch_edits[j]);
|
||||||
|
}
|
||||||
|
TEST_SYNC_POINT_CALLBACK(
|
||||||
|
"VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", &tmp);
|
||||||
|
k = i;
|
||||||
|
}
|
||||||
|
#endif // NDEBUG
|
||||||
|
|
||||||
uint64_t new_manifest_file_size = 0;
|
uint64_t new_manifest_file_size = 0;
|
||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
@ -3205,7 +3291,7 @@ Status VersionSet::LogAndApply(
|
|||||||
if (!manifest_writers_.empty()) {
|
if (!manifest_writers_.empty()) {
|
||||||
manifest_writers_.front()->cv.Signal();
|
manifest_writers_.front()->cv.Signal();
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::ShutdownInProgress();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ProcessManifestWrites(writers, mu, db_directory, new_descriptor_log,
|
return ProcessManifestWrites(writers, mu, db_directory, new_descriptor_log,
|
||||||
@ -4253,7 +4339,7 @@ void VersionSet::AddLiveFiles(std::vector<FileDescriptor>* live_list) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
InternalIterator* VersionSet::MakeInputIterator(
|
InternalIterator* VersionSet::MakeInputIterator(
|
||||||
const Compaction* c, RangeDelAggregatorV2* range_del_agg,
|
const Compaction* c, RangeDelAggregator* range_del_agg,
|
||||||
const EnvOptions& env_options_compactions) {
|
const EnvOptions& env_options_compactions) {
|
||||||
auto cfd = c->column_family_data();
|
auto cfd = c->column_family_data();
|
||||||
ReadOptions read_options;
|
ReadOptions read_options;
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/file_indexer.h"
|
#include "db/file_indexer.h"
|
||||||
#include "db/log_reader.h"
|
#include "db/log_reader.h"
|
||||||
#include "db/range_del_aggregator_v2.h"
|
#include "db/range_del_aggregator.h"
|
||||||
#include "db/read_callback.h"
|
#include "db/read_callback.h"
|
||||||
#include "db/table_cache.h"
|
#include "db/table_cache.h"
|
||||||
#include "db/version_builder.h"
|
#include "db/version_builder.h"
|
||||||
@ -538,11 +538,11 @@ class Version {
|
|||||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||||
void AddIterators(const ReadOptions&, const EnvOptions& soptions,
|
void AddIterators(const ReadOptions&, const EnvOptions& soptions,
|
||||||
MergeIteratorBuilder* merger_iter_builder,
|
MergeIteratorBuilder* merger_iter_builder,
|
||||||
RangeDelAggregatorV2* range_del_agg);
|
RangeDelAggregator* range_del_agg);
|
||||||
|
|
||||||
void AddIteratorsForLevel(const ReadOptions&, const EnvOptions& soptions,
|
void AddIteratorsForLevel(const ReadOptions&, const EnvOptions& soptions,
|
||||||
MergeIteratorBuilder* merger_iter_builder,
|
MergeIteratorBuilder* merger_iter_builder,
|
||||||
int level, RangeDelAggregatorV2* range_del_agg);
|
int level, RangeDelAggregator* range_del_agg);
|
||||||
|
|
||||||
Status OverlapWithLevelIterator(const ReadOptions&, const EnvOptions&,
|
Status OverlapWithLevelIterator(const ReadOptions&, const EnvOptions&,
|
||||||
const Slice& smallest_user_key,
|
const Slice& smallest_user_key,
|
||||||
@ -935,7 +935,7 @@ class VersionSet {
|
|||||||
// Create an iterator that reads over the compaction inputs for "*c".
|
// Create an iterator that reads over the compaction inputs for "*c".
|
||||||
// The caller should delete the iterator when no longer needed.
|
// The caller should delete the iterator when no longer needed.
|
||||||
InternalIterator* MakeInputIterator(
|
InternalIterator* MakeInputIterator(
|
||||||
const Compaction* c, RangeDelAggregatorV2* range_del_agg,
|
const Compaction* c, RangeDelAggregator* range_del_agg,
|
||||||
const EnvOptions& env_options_compactions);
|
const EnvOptions& env_options_compactions);
|
||||||
|
|
||||||
// Add all files listed in any live version to *live.
|
// Add all files listed in any live version to *live.
|
||||||
|
@ -605,9 +605,13 @@ TEST_F(FindLevelFileTest, LevelOverlappingFiles) {
|
|||||||
ASSERT_TRUE(Overlaps("600", "700"));
|
ASSERT_TRUE(Overlaps("600", "700"));
|
||||||
}
|
}
|
||||||
|
|
||||||
class VersionSetTest : public testing::Test {
|
class VersionSetTestBase {
|
||||||
public:
|
public:
|
||||||
VersionSetTest()
|
const static std::string kColumnFamilyName1;
|
||||||
|
const static std::string kColumnFamilyName2;
|
||||||
|
const static std::string kColumnFamilyName3;
|
||||||
|
|
||||||
|
VersionSetTestBase()
|
||||||
: env_(Env::Default()),
|
: env_(Env::Default()),
|
||||||
dbname_(test::PerThreadDBPath("version_set_test")),
|
dbname_(test::PerThreadDBPath("version_set_test")),
|
||||||
db_options_(),
|
db_options_(),
|
||||||
@ -635,8 +639,9 @@ class VersionSetTest : public testing::Test {
|
|||||||
new_db.SetNextFile(2);
|
new_db.SetNextFile(2);
|
||||||
new_db.SetLastSequence(0);
|
new_db.SetLastSequence(0);
|
||||||
|
|
||||||
const std::vector<std::string> cf_names = {kDefaultColumnFamilyName,
|
const std::vector<std::string> cf_names = {
|
||||||
"alice", "bob"};
|
kDefaultColumnFamilyName, kColumnFamilyName1, kColumnFamilyName2,
|
||||||
|
kColumnFamilyName3};
|
||||||
const int kInitialNumOfCfs = static_cast<int>(cf_names.size());
|
const int kInitialNumOfCfs = static_cast<int>(cf_names.size());
|
||||||
autovector<VersionEdit> new_cfs;
|
autovector<VersionEdit> new_cfs;
|
||||||
uint64_t last_seq = 1;
|
uint64_t last_seq = 1;
|
||||||
@ -711,6 +716,15 @@ class VersionSetTest : public testing::Test {
|
|||||||
std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
|
std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const std::string VersionSetTestBase::kColumnFamilyName1 = "alice";
|
||||||
|
const std::string VersionSetTestBase::kColumnFamilyName2 = "bob";
|
||||||
|
const std::string VersionSetTestBase::kColumnFamilyName3 = "charles";
|
||||||
|
|
||||||
|
class VersionSetTest : public VersionSetTestBase, public testing::Test {
|
||||||
|
public:
|
||||||
|
VersionSetTest() : VersionSetTestBase() {}
|
||||||
|
};
|
||||||
|
|
||||||
TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) {
|
TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) {
|
||||||
NewDB();
|
NewDB();
|
||||||
const int kGroupSize = 5;
|
const int kGroupSize = 5;
|
||||||
@ -958,6 +972,126 @@ TEST_F(VersionSetTest, HandleIncorrectAtomicGroupSize) {
|
|||||||
versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
|
versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
|
||||||
EXPECT_TRUE(incorrect_group_size);
|
EXPECT_TRUE(incorrect_group_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class VersionSetTestDropOneCF : public VersionSetTestBase,
|
||||||
|
public testing::TestWithParam<std::string> {
|
||||||
|
public:
|
||||||
|
VersionSetTestDropOneCF() : VersionSetTestBase() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
// This test simulates the following execution sequence
|
||||||
|
// Time thread1 bg_flush_thr
|
||||||
|
// | Prepare version edits (e1,e2,e3) for atomic
|
||||||
|
// | flush cf1, cf2, cf3
|
||||||
|
// | Enqueue e to drop cfi
|
||||||
|
// | to manifest_writers_
|
||||||
|
// | Enqueue (e1,e2,e3) to manifest_writers_
|
||||||
|
// |
|
||||||
|
// | Apply e,
|
||||||
|
// | cfi.IsDropped() is true
|
||||||
|
// | Apply (e1,e2,e3),
|
||||||
|
// | since cfi.IsDropped() == true, we need to
|
||||||
|
// | drop ei and write the rest to MANIFEST.
|
||||||
|
// V
|
||||||
|
//
|
||||||
|
// Repeat the test for i = 1, 2, 3 to simulate dropping the first, middle and
|
||||||
|
// last column family in an atomic group.
|
||||||
|
TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
|
||||||
|
std::vector<ColumnFamilyDescriptor> column_families;
|
||||||
|
SequenceNumber last_seqno;
|
||||||
|
std::unique_ptr<log::Writer> log_writer;
|
||||||
|
PrepareManifest(&column_families, &last_seqno, &log_writer);
|
||||||
|
Status s = SetCurrentFile(env_, dbname_, 1, nullptr);
|
||||||
|
ASSERT_OK(s);
|
||||||
|
|
||||||
|
EXPECT_OK(versions_->Recover(column_families, false /* read_only */));
|
||||||
|
EXPECT_EQ(column_families.size(),
|
||||||
|
versions_->GetColumnFamilySet()->NumberOfColumnFamilies());
|
||||||
|
|
||||||
|
const int kAtomicGroupSize = 3;
|
||||||
|
const std::vector<std::string> non_default_cf_names = {
|
||||||
|
kColumnFamilyName1, kColumnFamilyName2, kColumnFamilyName3};
|
||||||
|
|
||||||
|
// Drop one column family
|
||||||
|
VersionEdit drop_cf_edit;
|
||||||
|
drop_cf_edit.DropColumnFamily();
|
||||||
|
const std::string cf_to_drop_name(GetParam());
|
||||||
|
auto cfd_to_drop =
|
||||||
|
versions_->GetColumnFamilySet()->GetColumnFamily(cf_to_drop_name);
|
||||||
|
ASSERT_NE(nullptr, cfd_to_drop);
|
||||||
|
// Increase its refcount because cfd_to_drop is used later, and we need to
|
||||||
|
// prevent it from being deleted.
|
||||||
|
cfd_to_drop->Ref();
|
||||||
|
drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID());
|
||||||
|
mutex_.Lock();
|
||||||
|
s = versions_->LogAndApply(cfd_to_drop,
|
||||||
|
*cfd_to_drop->GetLatestMutableCFOptions(),
|
||||||
|
&drop_cf_edit, &mutex_);
|
||||||
|
mutex_.Unlock();
|
||||||
|
ASSERT_OK(s);
|
||||||
|
|
||||||
|
std::vector<VersionEdit> edits(kAtomicGroupSize);
|
||||||
|
uint32_t remaining = kAtomicGroupSize;
|
||||||
|
size_t i = 0;
|
||||||
|
autovector<ColumnFamilyData*> cfds;
|
||||||
|
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
||||||
|
autovector<autovector<VersionEdit*>> edit_lists;
|
||||||
|
for (const auto& cf_name : non_default_cf_names) {
|
||||||
|
auto cfd = (cf_name != cf_to_drop_name)
|
||||||
|
? versions_->GetColumnFamilySet()->GetColumnFamily(cf_name)
|
||||||
|
: cfd_to_drop;
|
||||||
|
ASSERT_NE(nullptr, cfd);
|
||||||
|
cfds.push_back(cfd);
|
||||||
|
mutable_cf_options_list.emplace_back(cfd->GetLatestMutableCFOptions());
|
||||||
|
edits[i].SetColumnFamily(cfd->GetID());
|
||||||
|
edits[i].SetLogNumber(0);
|
||||||
|
edits[i].SetNextFile(2);
|
||||||
|
edits[i].MarkAtomicGroup(--remaining);
|
||||||
|
edits[i].SetLastSequence(last_seqno++);
|
||||||
|
autovector<VersionEdit*> tmp_edits;
|
||||||
|
tmp_edits.push_back(&edits[i]);
|
||||||
|
edit_lists.emplace_back(tmp_edits);
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
int called = 0;
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||||
|
SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"VersionSet::ProcessManifestWrites:CheckOneAtomicGroup", [&](void* arg) {
|
||||||
|
std::vector<VersionEdit*>* tmp_edits =
|
||||||
|
reinterpret_cast<std::vector<VersionEdit*>*>(arg);
|
||||||
|
EXPECT_EQ(kAtomicGroupSize - 1, tmp_edits->size());
|
||||||
|
for (const auto e : *tmp_edits) {
|
||||||
|
bool found = false;
|
||||||
|
for (const auto& e2 : edits) {
|
||||||
|
if (&e2 == e) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(found);
|
||||||
|
}
|
||||||
|
++called;
|
||||||
|
});
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
mutex_.Lock();
|
||||||
|
s = versions_->LogAndApply(cfds, mutable_cf_options_list, edit_lists,
|
||||||
|
&mutex_);
|
||||||
|
mutex_.Unlock();
|
||||||
|
ASSERT_OK(s);
|
||||||
|
ASSERT_EQ(1, called);
|
||||||
|
if (cfd_to_drop->Unref()) {
|
||||||
|
delete cfd_to_drop;
|
||||||
|
cfd_to_drop = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
AtomicGroup, VersionSetTestDropOneCF,
|
||||||
|
testing::Values(VersionSetTestBase::kColumnFamilyName1,
|
||||||
|
VersionSetTestBase::kColumnFamilyName2,
|
||||||
|
VersionSetTestBase::kColumnFamilyName3));
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -144,13 +145,18 @@ struct TableFileDeletionInfo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct FileOperationInfo {
|
struct FileOperationInfo {
|
||||||
|
using TimePoint = std::chrono::time_point<std::chrono::system_clock,
|
||||||
|
std::chrono::nanoseconds>;
|
||||||
|
|
||||||
const std::string& path;
|
const std::string& path;
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
size_t length;
|
size_t length;
|
||||||
time_t start_timestamp;
|
const TimePoint& start_timestamp;
|
||||||
time_t finish_timestamp;
|
const TimePoint& finish_timestamp;
|
||||||
Status status;
|
Status status;
|
||||||
FileOperationInfo(const std::string& _path) : path(_path) {}
|
FileOperationInfo(const std::string& _path, const TimePoint& start,
|
||||||
|
const TimePoint& finish)
|
||||||
|
: path(_path), start_timestamp(start), finish_timestamp(finish) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FlushJobInfo {
|
struct FlushJobInfo {
|
||||||
|
@ -201,8 +201,8 @@ struct PerfContext {
|
|||||||
uint64_t env_lock_file_nanos;
|
uint64_t env_lock_file_nanos;
|
||||||
uint64_t env_unlock_file_nanos;
|
uint64_t env_unlock_file_nanos;
|
||||||
uint64_t env_new_logger_nanos;
|
uint64_t env_new_logger_nanos;
|
||||||
std::map<uint32_t, PerfContextByLevel>* level_to_perf_context;
|
std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
|
||||||
bool per_level_perf_context_enabled;
|
bool per_level_perf_context_enabled = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get Thread-local PerfContext object pointer
|
// Get Thread-local PerfContext object pointer
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
#define ROCKSDB_MAJOR 5
|
#define ROCKSDB_MAJOR 5
|
||||||
#define ROCKSDB_MINOR 18
|
#define ROCKSDB_MINOR 18
|
||||||
#define ROCKSDB_PATCH 0
|
#define ROCKSDB_PATCH 4
|
||||||
|
|
||||||
// Do not use these. We made the mistake of declaring macros starting with
|
// Do not use these. We made the mistake of declaring macros starting with
|
||||||
// double underscore. Now we have to live with our choice. We'll deprecate these
|
// double underscore. Now we have to live with our choice. We'll deprecate these
|
||||||
|
@ -313,17 +313,19 @@ if(NOT EXISTS ${JAVA_TEST_LIBDIR})
|
|||||||
file(MAKE_DIRECTORY mkdir ${JAVA_TEST_LIBDIR})
|
file(MAKE_DIRECTORY mkdir ${JAVA_TEST_LIBDIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (DEFINED CUSTOM_REPO_URL)
|
if (DEFINED CUSTOM_DEPS_URL)
|
||||||
set(SEARCH_REPO_URL ${CUSTOM_REPO_URL}/)
|
set(DEPS_URL ${CUSTOM_DEPS_URL}/)
|
||||||
set(CENTRAL_REPO_URL ${CUSTOM_REPO_URL}/)
|
|
||||||
else ()
|
else ()
|
||||||
set(SEARCH_REPO_URL "http://search.maven.org/remotecontent?filepath=")
|
# This is a URL for artifacts from a "fake" release on pdillinger's fork,
|
||||||
set(CENTRAL_REPO_URL "http://central.maven.org/maven2/")
|
# so as not to put binaries in git (ew). We should move to hosting these
|
||||||
|
# under the facebook account on github, or something else more reliable
|
||||||
|
# than maven.org, which has been failing frequently from Travis.
|
||||||
|
set(DEPS_URL "https://github.com/pdillinger/rocksdb/releases/download/v6.6.x-java-deps")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT EXISTS ${JAVA_JUNIT_JAR})
|
if(NOT EXISTS ${JAVA_JUNIT_JAR})
|
||||||
message("Downloading ${JAVA_JUNIT_JAR}")
|
message("Downloading ${JAVA_JUNIT_JAR}")
|
||||||
file(DOWNLOAD ${SEARCH_REPO_URL}junit/junit/4.12/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
file(DOWNLOAD ${DEPS_URL}/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
||||||
list(GET downloadStatus 0 error_code)
|
list(GET downloadStatus 0 error_code)
|
||||||
if(NOT error_code EQUAL 0)
|
if(NOT error_code EQUAL 0)
|
||||||
message(FATAL_ERROR "Failed downloading ${JAVA_JUNIT_JAR}")
|
message(FATAL_ERROR "Failed downloading ${JAVA_JUNIT_JAR}")
|
||||||
@ -332,7 +334,7 @@ if(NOT EXISTS ${JAVA_JUNIT_JAR})
|
|||||||
endif()
|
endif()
|
||||||
if(NOT EXISTS ${JAVA_HAMCR_JAR})
|
if(NOT EXISTS ${JAVA_HAMCR_JAR})
|
||||||
message("Downloading ${JAVA_HAMCR_JAR}")
|
message("Downloading ${JAVA_HAMCR_JAR}")
|
||||||
file(DOWNLOAD ${SEARCH_REPO_URL}org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
file(DOWNLOAD ${DEPS_URL}/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
||||||
list(GET downloadStatus 0 error_code)
|
list(GET downloadStatus 0 error_code)
|
||||||
if(NOT error_code EQUAL 0)
|
if(NOT error_code EQUAL 0)
|
||||||
message(FATAL_ERROR "Failed downloading ${JAVA_HAMCR_JAR}")
|
message(FATAL_ERROR "Failed downloading ${JAVA_HAMCR_JAR}")
|
||||||
@ -341,7 +343,7 @@ if(NOT EXISTS ${JAVA_HAMCR_JAR})
|
|||||||
endif()
|
endif()
|
||||||
if(NOT EXISTS ${JAVA_MOCKITO_JAR})
|
if(NOT EXISTS ${JAVA_MOCKITO_JAR})
|
||||||
message("Downloading ${JAVA_MOCKITO_JAR}")
|
message("Downloading ${JAVA_MOCKITO_JAR}")
|
||||||
file(DOWNLOAD ${SEARCH_REPO_URL}org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
file(DOWNLOAD ${DEPS_URL}/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
||||||
list(GET downloadStatus 0 error_code)
|
list(GET downloadStatus 0 error_code)
|
||||||
if(NOT error_code EQUAL 0)
|
if(NOT error_code EQUAL 0)
|
||||||
message(FATAL_ERROR "Failed downloading ${JAVA_MOCKITO_JAR}")
|
message(FATAL_ERROR "Failed downloading ${JAVA_MOCKITO_JAR}")
|
||||||
@ -350,7 +352,7 @@ if(NOT EXISTS ${JAVA_MOCKITO_JAR})
|
|||||||
endif()
|
endif()
|
||||||
if(NOT EXISTS ${JAVA_CGLIB_JAR})
|
if(NOT EXISTS ${JAVA_CGLIB_JAR})
|
||||||
message("Downloading ${JAVA_CGLIB_JAR}")
|
message("Downloading ${JAVA_CGLIB_JAR}")
|
||||||
file(DOWNLOAD ${SEARCH_REPO_URL}cglib/cglib/2.2.2/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
file(DOWNLOAD ${DEPS_URL}/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
||||||
list(GET downloadStatus 0 error_code)
|
list(GET downloadStatus 0 error_code)
|
||||||
if(NOT error_code EQUAL 0)
|
if(NOT error_code EQUAL 0)
|
||||||
message(FATAL_ERROR "Failed downloading ${JAVA_CGLIB_JAR}")
|
message(FATAL_ERROR "Failed downloading ${JAVA_CGLIB_JAR}")
|
||||||
@ -359,7 +361,7 @@ if(NOT EXISTS ${JAVA_CGLIB_JAR})
|
|||||||
endif()
|
endif()
|
||||||
if(NOT EXISTS ${JAVA_ASSERTJ_JAR})
|
if(NOT EXISTS ${JAVA_ASSERTJ_JAR})
|
||||||
message("Downloading ${JAVA_ASSERTJ_JAR}")
|
message("Downloading ${JAVA_ASSERTJ_JAR}")
|
||||||
file(DOWNLOAD ${CENTRAL_REPO_URL}org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
file(DOWNLOAD ${DEPS_URL}/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus)
|
||||||
list(GET downloadStatus 0 error_code)
|
list(GET downloadStatus 0 error_code)
|
||||||
if(NOT error_code EQUAL 0)
|
if(NOT error_code EQUAL 0)
|
||||||
message(FATAL_ERROR "Failed downloading ${JAVA_ASSERTJ_JAR}")
|
message(FATAL_ERROR "Failed downloading ${JAVA_ASSERTJ_JAR}")
|
||||||
|
@ -192,8 +192,11 @@ ifneq ($(DEBUG_LEVEL),0)
|
|||||||
JAVAC_ARGS = -Xlint:deprecation -Xlint:unchecked
|
JAVAC_ARGS = -Xlint:deprecation -Xlint:unchecked
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SEARCH_REPO_URL?=http://search.maven.org/remotecontent?filepath=
|
# This is a URL for artifacts from a "fake" release on pdillinger's fork,
|
||||||
CENTRAL_REPO_URL?=http://central.maven.org/maven2/
|
# so as not to put binaries in git (ew). We should move to hosting these
|
||||||
|
# under the facebook account on github, or something else more reliable
|
||||||
|
# than maven.org, which has been failing frequently from Travis.
|
||||||
|
DEPS_URL?=https://github.com/pdillinger/rocksdb/releases/download/v6.6.x-java-deps
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(AM_V_at)rm -rf include/*
|
$(AM_V_at)rm -rf include/*
|
||||||
@ -250,11 +253,11 @@ optimistic_transaction_sample: java
|
|||||||
|
|
||||||
resolve_test_deps:
|
resolve_test_deps:
|
||||||
test -d "$(JAVA_TEST_LIBDIR)" || mkdir -p "$(JAVA_TEST_LIBDIR)"
|
test -d "$(JAVA_TEST_LIBDIR)" || mkdir -p "$(JAVA_TEST_LIBDIR)"
|
||||||
test -s "$(JAVA_JUNIT_JAR)" || cp $(MVN_LOCAL)/junit/junit/4.12/junit-4.12.jar $(JAVA_TEST_LIBDIR) || curl -k -L -o $(JAVA_JUNIT_JAR) $(SEARCH_REPO_URL)junit/junit/4.12/junit-4.12.jar
|
test -s "$(JAVA_JUNIT_JAR)" || cp $(MVN_LOCAL)/junit/junit/4.12/junit-4.12.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output $(JAVA_JUNIT_JAR) --location $(DEPS_URL)/junit-4.12.jar
|
||||||
test -s "$(JAVA_HAMCR_JAR)" || cp $(MVN_LOCAL)/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar $(JAVA_TEST_LIBDIR) || curl -k -L -o $(JAVA_HAMCR_JAR) $(SEARCH_REPO_URL)org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar
|
test -s "$(JAVA_HAMCR_JAR)" || cp $(MVN_LOCAL)/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output $(JAVA_HAMCR_JAR) --location $(DEPS_URL)/hamcrest-core-1.3.jar
|
||||||
test -s "$(JAVA_MOCKITO_JAR)" || cp $(MVN_LOCAL)/org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar $(JAVA_TEST_LIBDIR) || curl -k -L -o "$(JAVA_MOCKITO_JAR)" $(SEARCH_REPO_URL)org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar
|
test -s "$(JAVA_MOCKITO_JAR)" || cp $(MVN_LOCAL)/org/mockito/mockito-all/1.10.19/mockito-all-1.10.19.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_MOCKITO_JAR)" --location $(DEPS_URL)/mockito-all-1.10.19.jar
|
||||||
test -s "$(JAVA_CGLIB_JAR)" || cp $(MVN_LOCAL)/cglib/cglib/2.2.2/cglib-2.2.2.jar $(JAVA_TEST_LIBDIR) || curl -k -L -o "$(JAVA_CGLIB_JAR)" $(SEARCH_REPO_URL)cglib/cglib/2.2.2/cglib-2.2.2.jar
|
test -s "$(JAVA_CGLIB_JAR)" || cp $(MVN_LOCAL)/cglib/cglib/2.2.2/cglib-2.2.2.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_CGLIB_JAR)" --location $(DEPS_URL)/cglib-2.2.2.jar
|
||||||
test -s "$(JAVA_ASSERTJ_JAR)" || cp $(MVN_LOCAL)/org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar $(JAVA_TEST_LIBDIR) || curl -k -L -o "$(JAVA_ASSERTJ_JAR)" $(CENTRAL_REPO_URL)org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar
|
test -s "$(JAVA_ASSERTJ_JAR)" || cp $(MVN_LOCAL)/org/assertj/assertj-core/1.7.1/assertj-core-1.7.1.jar $(JAVA_TEST_LIBDIR) || curl --fail --insecure --output "$(JAVA_ASSERTJ_JAR)" --location $(DEPS_URL)/assertj-core-1.7.1.jar
|
||||||
|
|
||||||
java_test: java resolve_test_deps
|
java_test: java resolve_test_deps
|
||||||
$(AM_V_GEN)mkdir -p $(TEST_CLASSES)
|
$(AM_V_GEN)mkdir -p $(TEST_CLASSES)
|
||||||
|
@ -8,11 +8,21 @@ cd /rocksdb-local
|
|||||||
|
|
||||||
# Use scl devtoolset if available (i.e. CentOS <7)
|
# Use scl devtoolset if available (i.e. CentOS <7)
|
||||||
if hash scl 2>/dev/null; then
|
if hash scl 2>/dev/null; then
|
||||||
|
if scl --list | grep -q 'devtoolset-7'; then
|
||||||
|
scl enable devtoolset-7 'make jclean clean'
|
||||||
|
scl enable devtoolset-7 'PORTABLE=1 make -j2 rocksdbjavastatic'
|
||||||
|
|
||||||
|
elif scl --list | grep -q 'devtoolset-2'; then
|
||||||
scl enable devtoolset-2 'make jclean clean'
|
scl enable devtoolset-2 'make jclean clean'
|
||||||
scl enable devtoolset-2 'PORTABLE=1 make -j8 rocksdbjavastatic'
|
scl enable devtoolset-2 'PORTABLE=1 make -j2 rocksdbjavastatic'
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "Could not find devtoolset"
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
make jclean clean
|
make jclean clean
|
||||||
PORTABLE=1 make -j8 rocksdbjavastatic
|
PORTABLE=1 make -j2 rocksdbjavastatic
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar /rocksdb-host/java/target
|
cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar /rocksdb-host/java/target
|
||||||
|
@ -131,7 +131,6 @@ void LoggerJniCallback::Logv(const InfoLogLevel log_level, const char* format,
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert(format != nullptr);
|
assert(format != nullptr);
|
||||||
assert(ap != nullptr);
|
|
||||||
const std::unique_ptr<char[]> msg = format_str(format, ap);
|
const std::unique_ptr<char[]> msg = format_str(format, ap);
|
||||||
|
|
||||||
// pass msg to java callback handler
|
// pass msg to java callback handler
|
||||||
|
@ -54,9 +54,9 @@ public enum MemoryUsageType {
|
|||||||
* cannot be found
|
* cannot be found
|
||||||
*/
|
*/
|
||||||
public static MemoryUsageType getMemoryUsageType(final byte byteIdentifier) {
|
public static MemoryUsageType getMemoryUsageType(final byte byteIdentifier) {
|
||||||
for (final MemoryUsageType MemoryUsageType : MemoryUsageType.values()) {
|
for (final MemoryUsageType memoryUsageType : MemoryUsageType.values()) {
|
||||||
if (MemoryUsageType.getValue() == byteIdentifier) {
|
if (memoryUsageType.getValue() == byteIdentifier) {
|
||||||
return MemoryUsageType;
|
return memoryUsageType;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ public enum MemoryUsageType {
|
|||||||
"Illegal value provided for MemoryUsageType.");
|
"Illegal value provided for MemoryUsageType.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private MemoryUsageType(byte value) {
|
MemoryUsageType(byte value) {
|
||||||
value_ = value;
|
value_ = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,6 +4,10 @@ public class Environment {
|
|||||||
private static String OS = System.getProperty("os.name").toLowerCase();
|
private static String OS = System.getProperty("os.name").toLowerCase();
|
||||||
private static String ARCH = System.getProperty("os.arch").toLowerCase();
|
private static String ARCH = System.getProperty("os.arch").toLowerCase();
|
||||||
|
|
||||||
|
public static boolean isAarch64() {
|
||||||
|
return ARCH.contains("aarch64");
|
||||||
|
}
|
||||||
|
|
||||||
public static boolean isPowerPC() {
|
public static boolean isPowerPC() {
|
||||||
return ARCH.contains("ppc");
|
return ARCH.contains("ppc");
|
||||||
}
|
}
|
||||||
@ -59,7 +63,7 @@ public class Environment {
|
|||||||
public static String getJniLibraryName(final String name) {
|
public static String getJniLibraryName(final String name) {
|
||||||
if (isUnix()) {
|
if (isUnix()) {
|
||||||
final String arch = is64Bit() ? "64" : "32";
|
final String arch = is64Bit() ? "64" : "32";
|
||||||
if(isPowerPC()) {
|
if(isPowerPC() || isAarch64()) {
|
||||||
return String.format("%sjni-linux-%s", name, ARCH);
|
return String.format("%sjni-linux-%s", name, ARCH);
|
||||||
} else if(isS390x()) {
|
} else if(isS390x()) {
|
||||||
return String.format("%sjni-linux%s", name, ARCH);
|
return String.format("%sjni-linux%s", name, ARCH);
|
||||||
|
@ -46,13 +46,13 @@ public class MergeTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private byte[] longToByteArray(long l) {
|
private byte[] longToByteArray(long l) {
|
||||||
ByteBuffer buf = ByteBuffer.allocate(Long.BYTES);
|
ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE);
|
||||||
buf.putLong(l);
|
buf.putLong(l);
|
||||||
return buf.array();
|
return buf.array();
|
||||||
}
|
}
|
||||||
|
|
||||||
private long longFromByteArray(byte[] a) {
|
private long longFromByteArray(byte[] a) {
|
||||||
ByteBuffer buf = ByteBuffer.allocate(Long.BYTES);
|
ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE);
|
||||||
buf.put(a);
|
buf.put(a);
|
||||||
buf.flip();
|
buf.flip();
|
||||||
return buf.getLong();
|
return buf.getLong();
|
||||||
|
@ -130,6 +130,24 @@ public class EnvironmentTest {
|
|||||||
isEqualTo("librocksdbjni.dll");
|
isEqualTo("librocksdbjni.dll");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void aarch64() {
|
||||||
|
setEnvironmentClassFields("Linux", "aarch64");
|
||||||
|
assertThat(Environment.isUnix()).isTrue();
|
||||||
|
assertThat(Environment.isAarch64()).isTrue();
|
||||||
|
assertThat(Environment.is64Bit()).isTrue();
|
||||||
|
assertThat(Environment.getJniLibraryExtension()).
|
||||||
|
isEqualTo(".so");
|
||||||
|
assertThat(Environment.getSharedLibraryName("rocksdb")).
|
||||||
|
isEqualTo("rocksdbjni");
|
||||||
|
assertThat(Environment.getJniLibraryName("rocksdb")).
|
||||||
|
isEqualTo("rocksdbjni-linux-aarch64");
|
||||||
|
assertThat(Environment.getJniLibraryFileName("rocksdb")).
|
||||||
|
isEqualTo("librocksdbjni-linux-aarch64.so");
|
||||||
|
assertThat(Environment.getSharedLibraryFileName("rocksdb")).
|
||||||
|
isEqualTo("librocksdbjni.so");
|
||||||
|
}
|
||||||
|
|
||||||
private void setEnvironmentClassFields(String osName,
|
private void setEnvironmentClassFields(String osName,
|
||||||
String osArch) {
|
String osArch) {
|
||||||
setEnvironmentClassField(OS_FIELD_NAME, osName);
|
setEnvironmentClassField(OS_FIELD_NAME, osName);
|
||||||
|
53
port/jemalloc_helper.h
Normal file
53
port/jemalloc_helper.h
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef ROCKSDB_JEMALLOC
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
#include <malloc_np.h>
|
||||||
|
#else
|
||||||
|
#include <jemalloc/jemalloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef JEMALLOC_CXX_THROW
|
||||||
|
#define JEMALLOC_CXX_THROW
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Declare non-standard jemalloc APIs as weak symbols. We can null-check these
|
||||||
|
// symbols to detect whether jemalloc is linked with the binary.
|
||||||
|
extern "C" void* mallocx(size_t, int) __attribute__((__weak__));
|
||||||
|
extern "C" void* rallocx(void*, size_t, int) __attribute__((__weak__));
|
||||||
|
extern "C" size_t xallocx(void*, size_t, size_t, int) __attribute__((__weak__));
|
||||||
|
extern "C" size_t sallocx(const void*, int) __attribute__((__weak__));
|
||||||
|
extern "C" void dallocx(void*, int) __attribute__((__weak__));
|
||||||
|
extern "C" void sdallocx(void*, size_t, int) __attribute__((__weak__));
|
||||||
|
extern "C" size_t nallocx(size_t, int) __attribute__((__weak__));
|
||||||
|
extern "C" int mallctl(const char*, void*, size_t*, void*, size_t)
|
||||||
|
__attribute__((__weak__));
|
||||||
|
extern "C" int mallctlnametomib(const char*, size_t*, size_t*)
|
||||||
|
__attribute__((__weak__));
|
||||||
|
extern "C" int mallctlbymib(const size_t*, size_t, void*, size_t*, void*,
|
||||||
|
size_t) __attribute__((__weak__));
|
||||||
|
extern "C" void malloc_stats_print(void (*)(void*, const char*), void*,
|
||||||
|
const char*) __attribute__((__weak__));
|
||||||
|
extern "C" size_t malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void*)
|
||||||
|
JEMALLOC_CXX_THROW __attribute__((__weak__));
|
||||||
|
|
||||||
|
// Check if Jemalloc is linked with the binary. Note the main program might be
|
||||||
|
// using a different memory allocator even this method return true.
|
||||||
|
// It is loosely based on folly::usingJEMalloc(), minus the check that actually
|
||||||
|
// allocate memory and see if it is through jemalloc, to handle the dlopen()
|
||||||
|
// case:
|
||||||
|
// https://github.com/facebook/folly/blob/76cf8b5841fb33137cfbf8b224f0226437c855bc/folly/memory/Malloc.h#L147
|
||||||
|
static inline bool HasJemalloc() {
|
||||||
|
return mallocx != nullptr && rallocx != nullptr && xallocx != nullptr &&
|
||||||
|
sallocx != nullptr && dallocx != nullptr && sdallocx != nullptr &&
|
||||||
|
nallocx != nullptr && mallctl != nullptr &&
|
||||||
|
mallctlnametomib != nullptr && mallctlbymib != nullptr &&
|
||||||
|
malloc_stats_print != nullptr && malloc_usable_size != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // ROCKSDB_JEMALLOC
|
7
src.mk
7
src.mk
@ -44,7 +44,6 @@ LIB_SOURCES = \
|
|||||||
db/merge_helper.cc \
|
db/merge_helper.cc \
|
||||||
db/merge_operator.cc \
|
db/merge_operator.cc \
|
||||||
db/range_del_aggregator.cc \
|
db/range_del_aggregator.cc \
|
||||||
db/range_del_aggregator_v2.cc \
|
|
||||||
db/range_tombstone_fragmenter.cc \
|
db/range_tombstone_fragmenter.cc \
|
||||||
db/repair.cc \
|
db/repair.cc \
|
||||||
db/snapshot_impl.cc \
|
db/snapshot_impl.cc \
|
||||||
@ -223,6 +222,11 @@ LIB_SOURCES = \
|
|||||||
utilities/write_batch_with_index/write_batch_with_index.cc \
|
utilities/write_batch_with_index/write_batch_with_index.cc \
|
||||||
utilities/write_batch_with_index/write_batch_with_index_internal.cc \
|
utilities/write_batch_with_index/write_batch_with_index_internal.cc \
|
||||||
|
|
||||||
|
ifeq ($(ARMCRC_SOURCE),1)
|
||||||
|
LIB_SOURCES +=\
|
||||||
|
util/crc32c_arm64.cc
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
|
ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
|
||||||
LIB_SOURCES_ASM =\
|
LIB_SOURCES_ASM =\
|
||||||
util/crc32c_ppc_asm.S
|
util/crc32c_ppc_asm.S
|
||||||
@ -334,7 +338,6 @@ MAIN_SOURCES = \
|
|||||||
db/repair_test.cc \
|
db/repair_test.cc \
|
||||||
db/range_del_aggregator_test.cc \
|
db/range_del_aggregator_test.cc \
|
||||||
db/range_del_aggregator_bench.cc \
|
db/range_del_aggregator_bench.cc \
|
||||||
db/range_del_aggregator_v2_test.cc \
|
|
||||||
db/range_tombstone_fragmenter_test.cc \
|
db/range_tombstone_fragmenter_test.cc \
|
||||||
db/table_properties_collector_test.cc \
|
db/table_properties_collector_test.cc \
|
||||||
db/util_merge_operators_test.cc \
|
db/util_merge_operators_test.cc \
|
||||||
|
@ -2348,7 +2348,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
|
|||||||
snapshot = read_options.snapshot->GetSequenceNumber();
|
snapshot = read_options.snapshot->GetSequenceNumber();
|
||||||
}
|
}
|
||||||
return new FragmentedRangeTombstoneIterator(
|
return new FragmentedRangeTombstoneIterator(
|
||||||
rep_->fragmented_range_dels, snapshot, rep_->internal_comparator);
|
rep_->fragmented_range_dels, rep_->internal_comparator, snapshot);
|
||||||
}
|
}
|
||||||
|
|
||||||
InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator(
|
InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator(
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
|
|
||||||
|
#include "util/crc32c_arm64.h"
|
||||||
|
|
||||||
#ifdef __powerpc64__
|
#ifdef __powerpc64__
|
||||||
#include "util/crc32c_ppc.h"
|
#include "util/crc32c_ppc.h"
|
||||||
#include "util/crc32c_ppc_constants.h"
|
#include "util/crc32c_ppc_constants.h"
|
||||||
@ -396,6 +398,8 @@ uint32_t ExtendImpl(uint32_t crc, const char* buf, size_t size) {
|
|||||||
return static_cast<uint32_t>(l ^ 0xffffffffu);
|
return static_cast<uint32_t>(l ^ 0xffffffffu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Detect if ARM64 CRC or not.
|
||||||
|
#ifndef HAVE_ARM64_CRC
|
||||||
// Detect if SS42 or not.
|
// Detect if SS42 or not.
|
||||||
#ifndef HAVE_POWER8
|
#ifndef HAVE_POWER8
|
||||||
|
|
||||||
@ -434,6 +438,7 @@ static bool isPCLMULQDQ() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif // HAVE_POWER8
|
#endif // HAVE_POWER8
|
||||||
|
#endif // HAVE_ARM64_CRC
|
||||||
|
|
||||||
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
|
typedef uint32_t (*Function)(uint32_t, const char*, size_t);
|
||||||
|
|
||||||
@ -463,6 +468,11 @@ static bool isAltiVec() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(HAVE_ARM64_CRC)
|
||||||
|
uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) {
|
||||||
|
return crc32c_arm64(crc, (const unsigned char *)buf, size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
std::string IsFastCrc32Supported() {
|
std::string IsFastCrc32Supported() {
|
||||||
bool has_fast_crc = false;
|
bool has_fast_crc = false;
|
||||||
@ -478,6 +488,14 @@ std::string IsFastCrc32Supported() {
|
|||||||
has_fast_crc = false;
|
has_fast_crc = false;
|
||||||
arch = "PPC";
|
arch = "PPC";
|
||||||
#endif
|
#endif
|
||||||
|
#elif defined(__linux__) && defined(HAVE_ARM64_CRC)
|
||||||
|
if (crc32c_runtime_check()) {
|
||||||
|
has_fast_crc = true;
|
||||||
|
arch = "Arm64";
|
||||||
|
} else {
|
||||||
|
has_fast_crc = false;
|
||||||
|
arch = "Arm64";
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
has_fast_crc = isSSE42();
|
has_fast_crc = isSSE42();
|
||||||
arch = "x86";
|
arch = "x86";
|
||||||
@ -1200,7 +1218,15 @@ uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
|
|||||||
#endif //HAVE_SSE42 && HAVE_PCLMUL
|
#endif //HAVE_SSE42 && HAVE_PCLMUL
|
||||||
|
|
||||||
static inline Function Choose_Extend() {
|
static inline Function Choose_Extend() {
|
||||||
#ifndef HAVE_POWER8
|
#ifdef HAVE_POWER8
|
||||||
|
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
|
||||||
|
#elif defined(__linux__) && defined(HAVE_ARM64_CRC)
|
||||||
|
if(crc32c_runtime_check()) {
|
||||||
|
return ExtendARMImpl;
|
||||||
|
} else {
|
||||||
|
return ExtendImpl<Slow_CRC32>;
|
||||||
|
}
|
||||||
|
#else
|
||||||
if (isSSE42()) {
|
if (isSSE42()) {
|
||||||
if (isPCLMULQDQ()) {
|
if (isPCLMULQDQ()) {
|
||||||
#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
|
#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
|
||||||
@ -1216,8 +1242,6 @@ static inline Function Choose_Extend() {
|
|||||||
else {
|
else {
|
||||||
return ExtendImpl<Slow_CRC32>;
|
return ExtendImpl<Slow_CRC32>;
|
||||||
}
|
}
|
||||||
#else //HAVE_POWER8
|
|
||||||
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
56
util/crc32c_arm64.cc
Normal file
56
util/crc32c_arm64.cc
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#include "util/crc32c_arm64.h"
|
||||||
|
|
||||||
|
#if defined(__linux__) && defined(HAVE_ARM64_CRC)
|
||||||
|
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#ifndef HWCAP_CRC32
|
||||||
|
#define HWCAP_CRC32 (1 << 7)
|
||||||
|
#endif
|
||||||
|
uint32_t crc32c_runtime_check(void) {
|
||||||
|
uint64_t auxv = getauxval(AT_HWCAP);
|
||||||
|
return (auxv & HWCAP_CRC32) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
|
||||||
|
unsigned len) {
|
||||||
|
const uint8_t *buf1;
|
||||||
|
const uint16_t *buf2;
|
||||||
|
const uint32_t *buf4;
|
||||||
|
const uint64_t *buf8;
|
||||||
|
|
||||||
|
int64_t length = (int64_t)len;
|
||||||
|
|
||||||
|
crc ^= 0xffffffff;
|
||||||
|
buf8 = (const uint64_t *)data;
|
||||||
|
while ((length -= sizeof(uint64_t)) >= 0) {
|
||||||
|
crc = __crc32cd(crc, *buf8++);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The following is more efficient than the straight loop */
|
||||||
|
buf4 = (const uint32_t *)buf8;
|
||||||
|
if (length & sizeof(uint32_t)) {
|
||||||
|
crc = __crc32cw(crc, *buf4++);
|
||||||
|
length -= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf2 = (const uint16_t *)buf4;
|
||||||
|
if (length & sizeof(uint16_t)) {
|
||||||
|
crc = __crc32ch(crc, *buf2++);
|
||||||
|
length -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf1 = (const uint8_t *)buf2;
|
||||||
|
if (length & sizeof(uint8_t))
|
||||||
|
crc = __crc32cb(crc, *buf1);
|
||||||
|
|
||||||
|
crc ^= 0xffffffff;
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
21
util/crc32c_arm64.h
Normal file
21
util/crc32c_arm64.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#ifndef UTIL_CRC32C_ARM64_H
|
||||||
|
#define UTIL_CRC32C_ARM64_H
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(__AARCH64__)
|
||||||
|
#ifdef __ARM_FEATURE_CRC32
|
||||||
|
#define HAVE_ARM64_CRC
|
||||||
|
#include <arm_acle.h>
|
||||||
|
extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, unsigned len);
|
||||||
|
extern uint32_t crc32c_runtime_check(void);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -52,11 +52,12 @@ DeleteScheduler::~DeleteScheduler() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
Status DeleteScheduler::DeleteFile(const std::string& file_path,
|
||||||
const std::string& dir_to_sync) {
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg) {
|
||||||
Status s;
|
Status s;
|
||||||
if (rate_bytes_per_sec_.load() <= 0 ||
|
if (rate_bytes_per_sec_.load() <= 0 || (!force_bg &&
|
||||||
total_trash_size_.load() >
|
total_trash_size_.load() >
|
||||||
sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load()) {
|
sst_file_manager_->GetTotalSize() * max_trash_db_ratio_.load())) {
|
||||||
// Rate limiting is disabled or trash size makes up more than
|
// Rate limiting is disabled or trash size makes up more than
|
||||||
// max_trash_db_ratio_ (default 25%) of the total DB size
|
// max_trash_db_ratio_ (default 25%) of the total DB size
|
||||||
TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
|
TEST_SYNC_POINT("DeleteScheduler::DeleteFile");
|
||||||
|
@ -46,8 +46,11 @@ class DeleteScheduler {
|
|||||||
rate_bytes_per_sec_.store(bytes_per_sec);
|
rate_bytes_per_sec_.store(bytes_per_sec);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark file as trash directory and schedule it's deletion
|
// Mark file as trash directory and schedule it's deletion. If force_bg is
|
||||||
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync);
|
// set, it forces the file to always be deleted in the background thread,
|
||||||
|
// except when rate limiting is disabled
|
||||||
|
Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
|
||||||
|
const bool force_bg = false);
|
||||||
|
|
||||||
// Wait for all files being deleteing in the background to finish or for
|
// Wait for all files being deleteing in the background to finish or for
|
||||||
// destructor to be called.
|
// destructor to be called.
|
||||||
|
@ -99,17 +99,18 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
|
|||||||
}
|
}
|
||||||
Slice tmp;
|
Slice tmp;
|
||||||
|
|
||||||
time_t start_ts = 0;
|
FileOperationInfo::TimePoint start_ts;
|
||||||
uint64_t orig_offset = 0;
|
uint64_t orig_offset = 0;
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
start_ts = std::chrono::system_clock::to_time_t(
|
start_ts = std::chrono::system_clock::now();
|
||||||
std::chrono::system_clock::now());
|
|
||||||
orig_offset = aligned_offset + buf.CurrentSize();
|
orig_offset = aligned_offset + buf.CurrentSize();
|
||||||
}
|
}
|
||||||
s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, &tmp,
|
s = file_->Read(aligned_offset + buf.CurrentSize(), allowed, &tmp,
|
||||||
buf.Destination());
|
buf.Destination());
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, s);
|
auto finish_ts = std::chrono::system_clock::now();
|
||||||
|
NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts,
|
||||||
|
s);
|
||||||
}
|
}
|
||||||
|
|
||||||
buf.Size(buf.CurrentSize() + tmp.size());
|
buf.Size(buf.CurrentSize() + tmp.size());
|
||||||
@ -145,16 +146,17 @@ Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
|
|||||||
Slice tmp_result;
|
Slice tmp_result;
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
time_t start_ts = 0;
|
FileOperationInfo::TimePoint start_ts;
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
start_ts = std::chrono::system_clock::to_time_t(
|
start_ts = std::chrono::system_clock::now();
|
||||||
std::chrono::system_clock::now());
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
s = file_->Read(offset + pos, allowed, &tmp_result, scratch + pos);
|
s = file_->Read(offset + pos, allowed, &tmp_result, scratch + pos);
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts, s);
|
auto finish_ts = std::chrono::system_clock::now();
|
||||||
|
NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts,
|
||||||
|
finish_ts, s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -442,18 +444,18 @@ Status WritableFileWriter::WriteBuffered(const char* data, size_t size) {
|
|||||||
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
|
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
|
||||||
|
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
time_t start_ts = 0;
|
FileOperationInfo::TimePoint start_ts;
|
||||||
uint64_t old_size = writable_file_->GetFileSize();
|
uint64_t old_size = writable_file_->GetFileSize();
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
start_ts = std::chrono::system_clock::to_time_t(
|
start_ts = std::chrono::system_clock::now();
|
||||||
std::chrono::system_clock::now());
|
|
||||||
old_size = next_write_offset_;
|
old_size = next_write_offset_;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
s = writable_file_->Append(Slice(src, allowed));
|
s = writable_file_->Append(Slice(src, allowed));
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
NotifyOnFileWriteFinish(old_size, allowed, start_ts, s);
|
auto finish_ts = std::chrono::system_clock::now();
|
||||||
|
NotifyOnFileWriteFinish(old_size, allowed, start_ts, finish_ts, s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
@ -518,15 +520,15 @@ Status WritableFileWriter::WriteDirect() {
|
|||||||
{
|
{
|
||||||
IOSTATS_TIMER_GUARD(write_nanos);
|
IOSTATS_TIMER_GUARD(write_nanos);
|
||||||
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
|
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
|
||||||
time_t start_ts(0);
|
FileOperationInfo::TimePoint start_ts;
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
start_ts = std::chrono::system_clock::to_time_t(
|
start_ts = std::chrono::system_clock::now();
|
||||||
std::chrono::system_clock::now());
|
|
||||||
}
|
}
|
||||||
// direct writes must be positional
|
// direct writes must be positional
|
||||||
s = writable_file_->PositionedAppend(Slice(src, size), write_offset);
|
s = writable_file_->PositionedAppend(Slice(src, size), write_offset);
|
||||||
if (ShouldNotifyListeners()) {
|
if (ShouldNotifyListeners()) {
|
||||||
NotifyOnFileWriteFinish(write_offset, size, start_ts, s);
|
auto finish_ts = std::chrono::system_clock::now();
|
||||||
|
NotifyOnFileWriteFinish(write_offset, size, start_ts, finish_ts, s);
|
||||||
}
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
buf_.Size(file_advance + leftover_tail);
|
buf_.Size(file_advance + leftover_tail);
|
||||||
|
@ -64,15 +64,13 @@ class SequentialFileReader {
|
|||||||
class RandomAccessFileReader {
|
class RandomAccessFileReader {
|
||||||
private:
|
private:
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
void NotifyOnFileReadFinish(uint64_t offset, size_t length, time_t start_ts,
|
void NotifyOnFileReadFinish(uint64_t offset, size_t length,
|
||||||
|
const FileOperationInfo::TimePoint& start_ts,
|
||||||
|
const FileOperationInfo::TimePoint& finish_ts,
|
||||||
const Status& status) const {
|
const Status& status) const {
|
||||||
FileOperationInfo info(file_name_);
|
FileOperationInfo info(file_name_, start_ts, finish_ts);
|
||||||
info.offset = offset;
|
info.offset = offset;
|
||||||
info.length = length;
|
info.length = length;
|
||||||
info.start_timestamp = start_ts;
|
|
||||||
time_t finish_ts =
|
|
||||||
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
|
||||||
info.finish_timestamp = finish_ts;
|
|
||||||
info.status = status;
|
info.status = status;
|
||||||
|
|
||||||
for (auto& listener : listeners_) {
|
for (auto& listener : listeners_) {
|
||||||
@ -157,15 +155,13 @@ class RandomAccessFileReader {
|
|||||||
class WritableFileWriter {
|
class WritableFileWriter {
|
||||||
private:
|
private:
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
void NotifyOnFileWriteFinish(uint64_t offset, size_t length, time_t start_ts,
|
void NotifyOnFileWriteFinish(uint64_t offset, size_t length,
|
||||||
|
const FileOperationInfo::TimePoint& start_ts,
|
||||||
|
const FileOperationInfo::TimePoint& finish_ts,
|
||||||
const Status& status) {
|
const Status& status) {
|
||||||
FileOperationInfo info(file_name_);
|
FileOperationInfo info(file_name_, start_ts, finish_ts);
|
||||||
info.offset = offset;
|
info.offset = offset;
|
||||||
info.length = length;
|
info.length = length;
|
||||||
info.start_timestamp = start_ts;
|
|
||||||
time_t finish_ts =
|
|
||||||
std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
|
||||||
info.finish_timestamp = finish_ts;
|
|
||||||
info.status = status;
|
info.status = status;
|
||||||
|
|
||||||
for (auto& listener : listeners_) {
|
for (auto& listener : listeners_) {
|
||||||
|
@ -89,16 +89,23 @@ Status CreateFile(Env* env, const std::string& destination,
|
|||||||
|
|
||||||
Status DeleteSSTFile(const ImmutableDBOptions* db_options,
|
Status DeleteSSTFile(const ImmutableDBOptions* db_options,
|
||||||
const std::string& fname, const std::string& dir_to_sync) {
|
const std::string& fname, const std::string& dir_to_sync) {
|
||||||
|
return DeleteDBFile(db_options, fname, dir_to_sync, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||||
|
const std::string& fname, const std::string& dir_to_sync,
|
||||||
|
const bool force_bg) {
|
||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
auto sfm =
|
auto sfm =
|
||||||
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
|
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
|
||||||
if (sfm) {
|
if (sfm) {
|
||||||
return sfm->ScheduleFileDeletion(fname, dir_to_sync);
|
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
|
||||||
} else {
|
} else {
|
||||||
return db_options->env->DeleteFile(fname);
|
return db_options->env->DeleteFile(fname);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)dir_to_sync;
|
(void)dir_to_sync;
|
||||||
|
(void)force_bg;
|
||||||
// SstFileManager is not supported in ROCKSDB_LITE
|
// SstFileManager is not supported in ROCKSDB_LITE
|
||||||
return db_options->env->DeleteFile(fname);
|
return db_options->env->DeleteFile(fname);
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,4 +25,9 @@ extern Status DeleteSSTFile(const ImmutableDBOptions* db_options,
|
|||||||
const std::string& fname,
|
const std::string& fname,
|
||||||
const std::string& path_to_sync);
|
const std::string& path_to_sync);
|
||||||
|
|
||||||
|
extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
|
||||||
|
const std::string& fname,
|
||||||
|
const std::string& path_to_sync,
|
||||||
|
const bool force_bg);
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -92,9 +92,7 @@ class BinaryHeap {
|
|||||||
reset_root_cmp_cache();
|
reset_root_cmp_cache();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool empty() const {
|
bool empty() const { return data_.empty(); }
|
||||||
return data_.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const { return data_.size(); }
|
size_t size() const { return data_.size(); }
|
||||||
|
|
||||||
|
@ -133,12 +133,16 @@ Status NewJemallocNodumpAllocator(
|
|||||||
JemallocAllocatorOptions& options,
|
JemallocAllocatorOptions& options,
|
||||||
std::shared_ptr<MemoryAllocator>* memory_allocator) {
|
std::shared_ptr<MemoryAllocator>* memory_allocator) {
|
||||||
*memory_allocator = nullptr;
|
*memory_allocator = nullptr;
|
||||||
#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
Status unsupported = Status::NotSupported(
|
||||||
(void) options;
|
|
||||||
return Status::NotSupported(
|
|
||||||
"JemallocNodumpAllocator only available with jemalloc version >= 5 "
|
"JemallocNodumpAllocator only available with jemalloc version >= 5 "
|
||||||
"and MADV_DONTDUMP is available.");
|
"and MADV_DONTDUMP is available.");
|
||||||
|
#ifndef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR
|
||||||
|
(void)options;
|
||||||
|
return unsupported;
|
||||||
#else
|
#else
|
||||||
|
if (!HasJemalloc()) {
|
||||||
|
return unsupported;
|
||||||
|
}
|
||||||
if (memory_allocator == nullptr) {
|
if (memory_allocator == nullptr) {
|
||||||
return Status::InvalidArgument("memory_allocator must be non-null.");
|
return Status::InvalidArgument("memory_allocator must be non-null.");
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "port/jemalloc_helper.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "rocksdb/memory_allocator.h"
|
#include "rocksdb/memory_allocator.h"
|
||||||
#include "util/core_local.h"
|
#include "util/core_local.h"
|
||||||
@ -15,7 +16,6 @@
|
|||||||
|
|
||||||
#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX)
|
#if defined(ROCKSDB_JEMALLOC) && defined(ROCKSDB_PLATFORM_POSIX)
|
||||||
|
|
||||||
#include <jemalloc/jemalloc.h>
|
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
|
||||||
#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP)
|
#if (JEMALLOC_VERSION_MAJOR >= 5) && defined(MADV_DONTDUMP)
|
||||||
|
@ -402,8 +402,11 @@ bool SstFileManagerImpl::CancelErrorRecovery(ErrorHandler* handler) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status SstFileManagerImpl::ScheduleFileDeletion(
|
Status SstFileManagerImpl::ScheduleFileDeletion(
|
||||||
const std::string& file_path, const std::string& path_to_sync) {
|
const std::string& file_path, const std::string& path_to_sync,
|
||||||
return delete_scheduler_.DeleteFile(file_path, path_to_sync);
|
const bool force_bg) {
|
||||||
|
TEST_SYNC_POINT("SstFileManagerImpl::ScheduleFileDeletion");
|
||||||
|
return delete_scheduler_.DeleteFile(file_path, path_to_sync,
|
||||||
|
force_bg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SstFileManagerImpl::WaitForEmptyTrash() {
|
void SstFileManagerImpl::WaitForEmptyTrash() {
|
||||||
|
@ -111,9 +111,12 @@ class SstFileManagerImpl : public SstFileManager {
|
|||||||
// not guaranteed
|
// not guaranteed
|
||||||
bool CancelErrorRecovery(ErrorHandler* db);
|
bool CancelErrorRecovery(ErrorHandler* db);
|
||||||
|
|
||||||
// Mark file as trash and schedule it's deletion.
|
// Mark file as trash and schedule it's deletion. If force_bg is set, it
|
||||||
|
// forces the file to be deleting in the background regardless of DB size,
|
||||||
|
// except when rate limited delete is disabled
|
||||||
virtual Status ScheduleFileDeletion(const std::string& file_path,
|
virtual Status ScheduleFileDeletion(const std::string& file_path,
|
||||||
const std::string& dir_to_sync);
|
const std::string& dir_to_sync,
|
||||||
|
const bool force_bg = false);
|
||||||
|
|
||||||
// Wait for all files being deleteing in the background to finish or for
|
// Wait for all files being deleteing in the background to finish or for
|
||||||
// destructor to be called.
|
// destructor to be called.
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "util/cast_util.h"
|
#include "util/cast_util.h"
|
||||||
#include "util/crc32c.h"
|
#include "util/crc32c.h"
|
||||||
#include "util/file_reader_writer.h"
|
#include "util/file_reader_writer.h"
|
||||||
|
#include "util/file_util.h"
|
||||||
#include "util/filename.h"
|
#include "util/filename.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
@ -1745,7 +1746,8 @@ std::pair<bool, int64_t> BlobDBImpl::DeleteObsoleteFiles(bool aborted) {
|
|||||||
bfile->PathName().c_str());
|
bfile->PathName().c_str());
|
||||||
|
|
||||||
blob_files_.erase(bfile->BlobFileNumber());
|
blob_files_.erase(bfile->BlobFileNumber());
|
||||||
Status s = env_->DeleteFile(bfile->PathName());
|
Status s = DeleteDBFile(&(db_impl_->immutable_db_options()),
|
||||||
|
bfile->PathName(), blob_dir_, true);
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
ROCKS_LOG_ERROR(db_options_.info_log,
|
ROCKS_LOG_ERROR(db_options_.info_log,
|
||||||
"File failed to be deleted as obsolete %s",
|
"File failed to be deleted as obsolete %s",
|
||||||
@ -1835,7 +1837,7 @@ Status DestroyBlobDB(const std::string& dbname, const Options& options,
|
|||||||
uint64_t number;
|
uint64_t number;
|
||||||
FileType type;
|
FileType type;
|
||||||
if (ParseFileName(f, &number, &type) && type == kBlobFile) {
|
if (ParseFileName(f, &number, &type) && type == kBlobFile) {
|
||||||
Status del = env->DeleteFile(blobdir + "/" + f);
|
Status del = DeleteDBFile(&soptions, blobdir + "/" + f, blobdir, true);
|
||||||
if (status.ok() && !del.ok()) {
|
if (status.ok() && !del.ok()) {
|
||||||
status = del;
|
status = del;
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "util/cast_util.h"
|
#include "util/cast_util.h"
|
||||||
#include "util/fault_injection_test_env.h"
|
#include "util/fault_injection_test_env.h"
|
||||||
#include "util/random.h"
|
#include "util/random.h"
|
||||||
|
#include "util/sst_file_manager_impl.h"
|
||||||
#include "util/string_util.h"
|
#include "util/string_util.h"
|
||||||
#include "util/sync_point.h"
|
#include "util/sync_point.h"
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
@ -762,6 +763,52 @@ TEST_F(BlobDBTest, ReadWhileGC) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(BlobDBTest, SstFileManager) {
|
||||||
|
// run the same test for Get(), MultiGet() and Iterator each.
|
||||||
|
std::shared_ptr<SstFileManager> sst_file_manager(
|
||||||
|
NewSstFileManager(mock_env_.get()));
|
||||||
|
sst_file_manager->SetDeleteRateBytesPerSecond(1);
|
||||||
|
SstFileManagerImpl *sfm =
|
||||||
|
static_cast<SstFileManagerImpl *>(sst_file_manager.get());
|
||||||
|
|
||||||
|
BlobDBOptions bdb_options;
|
||||||
|
bdb_options.min_blob_size = 0;
|
||||||
|
Options db_options;
|
||||||
|
|
||||||
|
int files_deleted_directly = 0;
|
||||||
|
int files_scheduled_to_delete = 0;
|
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SstFileManagerImpl::ScheduleFileDeletion",
|
||||||
|
[&](void * /*arg*/) { files_scheduled_to_delete++; });
|
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteFile",
|
||||||
|
[&](void * /*arg*/) { files_deleted_directly++; });
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
db_options.sst_file_manager = sst_file_manager;
|
||||||
|
|
||||||
|
Open(bdb_options, db_options);
|
||||||
|
|
||||||
|
// Create one obselete file and clean it.
|
||||||
|
blob_db_->Put(WriteOptions(), "foo", "bar");
|
||||||
|
auto blob_files = blob_db_impl()->TEST_GetBlobFiles();
|
||||||
|
ASSERT_EQ(1, blob_files.size());
|
||||||
|
std::shared_ptr<BlobFile> bfile = blob_files[0];
|
||||||
|
ASSERT_OK(blob_db_impl()->TEST_CloseBlobFile(bfile));
|
||||||
|
GCStats gc_stats;
|
||||||
|
ASSERT_OK(blob_db_impl()->TEST_GCFileAndUpdateLSM(bfile, &gc_stats));
|
||||||
|
blob_db_impl()->TEST_DeleteObsoleteFiles();
|
||||||
|
|
||||||
|
// Even if SSTFileManager is not set, DB is creating a dummy one.
|
||||||
|
ASSERT_EQ(1, files_scheduled_to_delete);
|
||||||
|
ASSERT_EQ(0, files_deleted_directly);
|
||||||
|
Destroy();
|
||||||
|
// Make sure that DestroyBlobDB() also goes through delete scheduler.
|
||||||
|
ASSERT_GE(2, files_scheduled_to_delete);
|
||||||
|
ASSERT_EQ(0, files_deleted_directly);
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
sfm->WaitForEmptyTrash();
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(BlobDBTest, SnapshotAndGarbageCollection) {
|
TEST_F(BlobDBTest, SnapshotAndGarbageCollection) {
|
||||||
BlobDBOptions bdb_options;
|
BlobDBOptions bdb_options;
|
||||||
bdb_options.min_blob_size = 0;
|
bdb_options.min_blob_size = 0;
|
||||||
|
@ -164,6 +164,16 @@ class CheckpointTest : public testing::Test {
|
|||||||
return DB::OpenForReadOnly(options, dbname_, &db_);
|
return DB::OpenForReadOnly(options, dbname_, &db_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status ReadOnlyReopenWithColumnFamilies(const std::vector<std::string>& cfs,
|
||||||
|
const Options& options) {
|
||||||
|
std::vector<ColumnFamilyDescriptor> column_families;
|
||||||
|
for (const auto& cf : cfs) {
|
||||||
|
column_families.emplace_back(cf, options);
|
||||||
|
}
|
||||||
|
return DB::OpenForReadOnly(options, dbname_, column_families, &handles_,
|
||||||
|
&db_);
|
||||||
|
}
|
||||||
|
|
||||||
Status TryReopen(const Options& options) {
|
Status TryReopen(const Options& options) {
|
||||||
Close();
|
Close();
|
||||||
last_options_ = options;
|
last_options_ = options;
|
||||||
@ -612,6 +622,69 @@ TEST_F(CheckpointTest, CheckpointWithUnsyncedDataDropped) {
|
|||||||
db_ = nullptr;
|
db_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(CheckpointTest, CheckpointReadOnlyDB) {
|
||||||
|
ASSERT_OK(Put("foo", "foo_value"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
Close();
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
ASSERT_OK(ReadOnlyReopen(options));
|
||||||
|
Checkpoint* checkpoint = nullptr;
|
||||||
|
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
|
||||||
|
ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name_));
|
||||||
|
delete checkpoint;
|
||||||
|
checkpoint = nullptr;
|
||||||
|
Close();
|
||||||
|
DB* snapshot_db = nullptr;
|
||||||
|
ASSERT_OK(DB::Open(options, snapshot_name_, &snapshot_db));
|
||||||
|
ReadOptions read_opts;
|
||||||
|
std::string get_result;
|
||||||
|
ASSERT_OK(snapshot_db->Get(read_opts, "foo", &get_result));
|
||||||
|
ASSERT_EQ("foo_value", get_result);
|
||||||
|
delete snapshot_db;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CheckpointTest, CheckpointReadOnlyDBWithMultipleColumnFamilies) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
CreateAndReopenWithCF({"pikachu", "eevee"}, options);
|
||||||
|
for (int i = 0; i != 3; ++i) {
|
||||||
|
ASSERT_OK(Put(i, "foo", "foo_value"));
|
||||||
|
ASSERT_OK(Flush(i));
|
||||||
|
}
|
||||||
|
Close();
|
||||||
|
Status s = ReadOnlyReopenWithColumnFamilies(
|
||||||
|
{kDefaultColumnFamilyName, "pikachu", "eevee"}, options);
|
||||||
|
ASSERT_OK(s);
|
||||||
|
Checkpoint* checkpoint = nullptr;
|
||||||
|
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
|
||||||
|
ASSERT_OK(checkpoint->CreateCheckpoint(snapshot_name_));
|
||||||
|
delete checkpoint;
|
||||||
|
checkpoint = nullptr;
|
||||||
|
Close();
|
||||||
|
|
||||||
|
std::vector<ColumnFamilyDescriptor> column_families{
|
||||||
|
{kDefaultColumnFamilyName, options},
|
||||||
|
{"pikachu", options},
|
||||||
|
{"eevee", options}};
|
||||||
|
DB* snapshot_db = nullptr;
|
||||||
|
std::vector<ColumnFamilyHandle*> snapshot_handles;
|
||||||
|
s = DB::Open(options, snapshot_name_, column_families, &snapshot_handles,
|
||||||
|
&snapshot_db);
|
||||||
|
ASSERT_OK(s);
|
||||||
|
ReadOptions read_opts;
|
||||||
|
for (int i = 0; i != 3; ++i) {
|
||||||
|
std::string get_result;
|
||||||
|
s = snapshot_db->Get(read_opts, snapshot_handles[i], "foo", &get_result);
|
||||||
|
ASSERT_OK(s);
|
||||||
|
ASSERT_EQ("foo_value", get_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto snapshot_h : snapshot_handles) {
|
||||||
|
delete snapshot_h;
|
||||||
|
}
|
||||||
|
snapshot_handles.clear();
|
||||||
|
delete snapshot_db;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -19,7 +19,7 @@ Status GetAllKeyVersions(DB* db, Slice begin_key, Slice end_key,
|
|||||||
|
|
||||||
DBImpl* idb = static_cast<DBImpl*>(db->GetRootDB());
|
DBImpl* idb = static_cast<DBImpl*>(db->GetRootDB());
|
||||||
auto icmp = InternalKeyComparator(idb->GetOptions().comparator);
|
auto icmp = InternalKeyComparator(idb->GetOptions().comparator);
|
||||||
RangeDelAggregatorV2 range_del_agg(&icmp,
|
ReadRangeDelAggregator range_del_agg(&icmp,
|
||||||
kMaxSequenceNumber /* upper_bound */);
|
kMaxSequenceNumber /* upper_bound */);
|
||||||
Arena arena;
|
Arena arena;
|
||||||
ScopedArenaIterator iter(
|
ScopedArenaIterator iter(
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user