Compare commits

...

21 Commits
main ... 4.8.fb

Author SHA1 Message Date
sdong
52a2cc5a6d Disable error as warning 2019-11-05 11:04:14 -08:00
sdong
d60acee274 Add one more #include<functional> 2019-11-05 11:04:14 -08:00
sdong
84b27f480f Add some include<functional> 2019-10-31 14:28:16 -07:00
sdong
92bf1f5773 [FB Internal] Point to the latest tool chain. 2019-10-31 14:28:08 -07:00
sdong
6672cd0770 [fb only] revert unintended change of USE_SSE
The previuos change that use gcc-5 set USE_SSE to wrong flag by mistake. Fix it.
2017-07-17 22:22:16 -07:00
sdong
1d0562e7c8 [FB Only] use gcc-5 2017-07-17 21:53:13 -07:00
Islam AbdelRahman
3f96ed111e backport fbcode gcc path fix 2016-06-08 13:08:36 -07:00
Yi Wu
0645e8b42c Fix win build
Summary: Fixing error with win build where we compare int64_t with size_t.

Test Plan: make check

Reviewers: andrewkr

Reviewed By: andrewkr

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D57885
2016-05-09 13:43:05 -07:00
Islam AbdelRahman
6504c31cb6 Fix valgrind (DBIteratorTest.ReadAhead)
Summary: This test is failing under valgrind because we dont delete the Env that we allocated

Test Plan: run the test under valgrind

Reviewers: andrewkr, yhchiang, yiwu, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57693
2016-05-09 13:42:56 -07:00
Andrew Kryczka
53a86bf61e Make max_dict_bytes optional in options string
Summary:
For backwards compatibility with older option strings, the parser needs
to treat this argument as optional.

Test Plan:
Updated unit test to cover case where compression_opts is present but
max_dict_bytes is omitted.

Reviewers: MarkCallaghan, sdong, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D57759
2016-05-06 11:39:26 -07:00
sdong
8a5ec0ec6f OptimizeForSmallDb(): revert some options whose defaults were just changed
Summary: We changed default options of max_open_files and max_file_opening_threads but didn't revert it in OptimizeForSmallDb().

Test Plan: Add a unit test

Reviewers: igor, yhchiang, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57675
2016-05-06 10:18:34 -07:00
sdong
b7dbbdf783 BlockBasedTable::Get() not to use prefix bloom if read_options.total_order_seek = true
Summary: This is to provide a way for users to skip prefix bloom in point look-up.

Test Plan: Add a new unit test scenario.

Reviewers: IslamAbdelRahman

Subscribers: leveldb, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57747
2016-05-06 10:18:03 -07:00
Yi Wu
ec64b24a0c Fixing lite build
Summary: Fixing lite build broke in unit test. `FilesPerLevel()` depends on `DB::GetProperty()`, which lite build doesn't support.

Test Plan: OPT=-DROCKSDB_LITE make check -j64

Reviewers: sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D57651
2016-05-05 10:09:16 -07:00
Yi Wu
2182bf2828 Enable configurable readahead for iterators
Summary:
Add an option `iterator_readahead_size` to `ReadOptions` to enable
configurable readahead for iterators similar to the corresponding
option for compaction.

Test Plan:
```
make commit_prereq
```

Reviewers: kumar.rangarajan, ott, igor, sdong

Reviewed By: sdong

Subscribers: yiwu, andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D55419
2016-05-05 10:09:16 -07:00
Islam AbdelRahman
2137377f0e Fix Iterator::Prev memory pinning bug
Summary: We should not use IterKey::SetKey with copy = false except if we are pinning the iterator thru it's life time, otherwise we may release the temporarily pinned blocks and in this case the IterKey will be pointing to freed memory

Test Plan: added a new test

Reviewers: sdong, andrewkr

Reviewed By: andrewkr

Subscribers: andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57561
2016-05-05 10:09:16 -07:00
Patrick Chan
a9f71f1b6e sst_dump won't print size for unsupported compression type 2016-05-05 10:09:16 -07:00
Islam AbdelRahman
1f9508953f Eliminate memcpy in Iterator::Prev() by pinning blocks for keys spanning multiple blocks
Summary:
This diff is stacked on top of this diff https://reviews.facebook.net/D56493
The current Iterator::Prev() implementation need to copy every value since the underlying Iterator may move after reading the value.
This can be optimized by making sure that the block containing the value is pinned until the Iterator move. which will improve the throughput by up to 1.5X

master
```
==> 1000000_Keys_100Byte.txt <==
readreverse  :       0.449 micros/op 2225887 ops/sec;  246.2 MB/s
readreverse  :       0.433 micros/op 2311508 ops/sec;  255.7 MB/s
readreverse  :       0.436 micros/op 2294335 ops/sec;  253.8 MB/s
readreverse  :       0.471 micros/op 2121295 ops/sec;  234.7 MB/s
readreverse  :       0.465 micros/op 2152227 ops/sec;  238.1 MB/s
readreverse  :       0.454 micros/op 2203011 ops/sec;  243.7 MB/s
readreverse  :       0.451 micros/op 2216095 ops/sec;  245.2 MB/s
readreverse  :       0.462 micros/op 2162447 ops/sec;  239.2 MB/s
readreverse  :       0.476 micros/op 2099151 ops/sec;  232.2 MB/s
readreverse  :       0.472 micros/op 2120710 ops/sec;  234.6 MB/s

avg : 242.34 MB/s

==> 1000000_Keys_1KB.txt <==
readreverse  :       1.013 micros/op 986793 ops/sec;  978.7 MB/s
readreverse  :       0.942 micros/op 1061136 ops/sec; 1052.5 MB/s
readreverse  :       0.951 micros/op 1051901 ops/sec; 1043.3 MB/s
readreverse  :       0.932 micros/op 1072894 ops/sec; 1064.1 MB/s
readreverse  :       1.024 micros/op 976720 ops/sec;  968.7 MB/s
readreverse  :       0.935 micros/op 1069169 ops/sec; 1060.4 MB/s
readreverse  :       1.012 micros/op 988132 ops/sec;  980.1 MB/s
readreverse  :       0.962 micros/op 1039579 ops/sec; 1031.1 MB/s
readreverse  :       0.991 micros/op 1008924 ops/sec; 1000.7 MB/s
readreverse  :       1.004 micros/op 996144 ops/sec;  988.0 MB/s

avg : 1016.76 MB/s

==> 1000000_Keys_10KB.txt <==
readreverse  :       4.167 micros/op 239952 ops/sec; 2346.9 MB/s
readreverse  :       4.070 micros/op 245713 ops/sec; 2403.3 MB/s
readreverse  :       4.572 micros/op 218733 ops/sec; 2139.4 MB/s
readreverse  :       4.497 micros/op 222388 ops/sec; 2175.2 MB/s
readreverse  :       4.203 micros/op 237920 ops/sec; 2327.1 MB/s
readreverse  :       4.206 micros/op 237756 ops/sec; 2325.5 MB/s
readreverse  :       4.181 micros/op 239149 ops/sec; 2339.1 MB/s
readreverse  :       4.157 micros/op 240552 ops/sec; 2352.8 MB/s
readreverse  :       4.187 micros/op 238848 ops/sec; 2336.1 MB/s
readreverse  :       4.106 micros/op 243575 ops/sec; 2382.4 MB/s

avg : 2312.78 MB/s

==> 100000_Keys_100KB.txt <==
readreverse  :      41.281 micros/op 24224 ops/sec; 2366.0 MB/s
readreverse  :      39.722 micros/op 25175 ops/sec; 2458.9 MB/s
readreverse  :      40.319 micros/op 24802 ops/sec; 2422.5 MB/s
readreverse  :      39.762 micros/op 25149 ops/sec; 2456.4 MB/s
readreverse  :      40.916 micros/op 24440 ops/sec; 2387.1 MB/s
readreverse  :      41.188 micros/op 24278 ops/sec; 2371.4 MB/s
readreverse  :      40.061 micros/op 24962 ops/sec; 2438.1 MB/s
readreverse  :      40.221 micros/op 24862 ops/sec; 2428.4 MB/s
readreverse  :      40.084 micros/op 24947 ops/sec; 2436.7 MB/s
readreverse  :      40.655 micros/op 24597 ops/sec; 2402.4 MB/s

avg : 2416.79 MB/s

==> 10000_Keys_1MB.txt <==
readreverse  :     298.038 micros/op 3355 ops/sec; 3355.3 MB/s
readreverse  :     335.001 micros/op 2985 ops/sec; 2985.1 MB/s
readreverse  :     286.956 micros/op 3484 ops/sec; 3484.9 MB/s
readreverse  :     329.954 micros/op 3030 ops/sec; 3030.8 MB/s
readreverse  :     306.428 micros/op 3263 ops/sec; 3263.5 MB/s
readreverse  :     330.749 micros/op 3023 ops/sec; 3023.5 MB/s
readreverse  :     328.903 micros/op 3040 ops/sec; 3040.5 MB/s
readreverse  :     324.853 micros/op 3078 ops/sec; 3078.4 MB/s
readreverse  :     320.488 micros/op 3120 ops/sec; 3120.3 MB/s
readreverse  :     320.536 micros/op 3119 ops/sec; 3119.8 MB/s

avg : 3150.21 MB/s
```

After memcpy elimination
```

==> 1000000_Keys_100Byte.txt <==
readreverse  :       0.395 micros/op 2529890 ops/sec;  279.9 MB/s
readreverse  :       0.368 micros/op 2715922 ops/sec;  300.5 MB/s
readreverse  :       0.384 micros/op 2603929 ops/sec;  288.1 MB/s
readreverse  :       0.375 micros/op 2663286 ops/sec;  294.6 MB/s
readreverse  :       0.357 micros/op 2802180 ops/sec;  310.0 MB/s
readreverse  :       0.363 micros/op 2757684 ops/sec;  305.1 MB/s
readreverse  :       0.372 micros/op 2689603 ops/sec;  297.5 MB/s
readreverse  :       0.379 micros/op 2638599 ops/sec;  291.9 MB/s
readreverse  :       0.375 micros/op 2663803 ops/sec;  294.7 MB/s
readreverse  :       0.375 micros/op 2665579 ops/sec;  294.9 MB/s

avg: 295.72 MB/s (1.22 X)

==> 1000000_Keys_1KB.txt <==
readreverse  :       0.879 micros/op 1138112 ops/sec; 1128.8 MB/s
readreverse  :       0.842 micros/op 1187998 ops/sec; 1178.3 MB/s
readreverse  :       0.837 micros/op 1194915 ops/sec; 1185.1 MB/s
readreverse  :       0.845 micros/op 1182983 ops/sec; 1173.3 MB/s
readreverse  :       0.877 micros/op 1140308 ops/sec; 1131.0 MB/s
readreverse  :       0.849 micros/op 1177581 ops/sec; 1168.0 MB/s
readreverse  :       0.915 micros/op 1093284 ops/sec; 1084.3 MB/s
readreverse  :       0.863 micros/op 1159418 ops/sec; 1149.9 MB/s
readreverse  :       0.895 micros/op 1117670 ops/sec; 1108.5 MB/s
readreverse  :       0.852 micros/op 1174116 ops/sec; 1164.5 MB/s

avg: 1147.17 MB/s (1.12 X)

==> 1000000_Keys_10KB.txt <==
readreverse  :       3.870 micros/op 258386 ops/sec; 2527.2 MB/s
readreverse  :       3.568 micros/op 280296 ops/sec; 2741.5 MB/s
readreverse  :       4.005 micros/op 249694 ops/sec; 2442.2 MB/s
readreverse  :       3.550 micros/op 281719 ops/sec; 2755.5 MB/s
readreverse  :       3.562 micros/op 280758 ops/sec; 2746.1 MB/s
readreverse  :       3.507 micros/op 285125 ops/sec; 2788.8 MB/s
readreverse  :       3.463 micros/op 288739 ops/sec; 2824.1 MB/s
readreverse  :       3.428 micros/op 291734 ops/sec; 2853.4 MB/s
readreverse  :       3.553 micros/op 281491 ops/sec; 2753.2 MB/s
readreverse  :       3.535 micros/op 282885 ops/sec; 2766.9 MB/s

avg : 2719.89 MB/s (1.17 X)

==> 100000_Keys_100KB.txt <==
readreverse  :      22.815 micros/op 43830 ops/sec; 4281.0 MB/s
readreverse  :      29.957 micros/op 33381 ops/sec; 3260.4 MB/s
readreverse  :      25.334 micros/op 39473 ops/sec; 3855.4 MB/s
readreverse  :      23.037 micros/op 43409 ops/sec; 4239.8 MB/s
readreverse  :      27.810 micros/op 35958 ops/sec; 3512.1 MB/s
readreverse  :      30.327 micros/op 32973 ops/sec; 3220.6 MB/s
readreverse  :      29.704 micros/op 33665 ops/sec; 3288.2 MB/s
readreverse  :      29.423 micros/op 33987 ops/sec; 3319.6 MB/s
readreverse  :      23.334 micros/op 42856 ops/sec; 4185.9 MB/s
readreverse  :      29.969 micros/op 33368 ops/sec; 3259.1 MB/s

avg : 3642.21 MB/s (1.5 X)

==> 10000_Keys_1MB.txt <==
readreverse  :     244.748 micros/op 4085 ops/sec; 4085.9 MB/s
readreverse  :     230.208 micros/op 4343 ops/sec; 4344.0 MB/s
readreverse  :     235.655 micros/op 4243 ops/sec; 4243.6 MB/s
readreverse  :     235.730 micros/op 4242 ops/sec; 4242.2 MB/s
readreverse  :     237.346 micros/op 4213 ops/sec; 4213.3 MB/s
readreverse  :     227.306 micros/op 4399 ops/sec; 4399.4 MB/s
readreverse  :     194.957 micros/op 5129 ops/sec; 5129.4 MB/s
readreverse  :     238.359 micros/op 4195 ops/sec; 4195.4 MB/s
readreverse  :     221.588 micros/op 4512 ops/sec; 4513.0 MB/s
readreverse  :     235.911 micros/op 4238 ops/sec; 4239.0 MB/s

avg : 4360.52 MB/s (1.38 X)
```

Test Plan: COMPILE_WITH_ASAN=1 make check -j64

Reviewers: andrewkr, yhchiang, sdong

Reviewed By: sdong

Subscribers: andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D56511
2016-05-05 10:09:16 -07:00
Yi Wu
39d156b120 Release RocksDB 4.8.0
Summary: Release RocksDB 4.8.0

Test Plan: N/A

Reviewers: sdong, IslamAbdelRahman

Reviewed By: IslamAbdelRahman

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D57525
2016-05-02 15:09:38 -07:00
Warren Falk
cb08423712 Fix #1110, 32-bit build failure on Mac OSX (#1112)
Using explicit 64-bit type in conditional in platforms above 32-bits
This appears to be necessary on Mac OSX as std::conditional does not appear to short circuit and evaluates the third template arg
Making the third template arg be 64 bits explicitly works around this problem and will work on both 32 bit and 64+ bit platforms.
2016-05-02 15:09:38 -07:00
Islam AbdelRahman
d9a7d8a769 Fix calling GetCurrentMutableCFOptions in CompactionJob::ProcessKeyValueCompaction()
Summary: GetCurrentMutableCFOptions() can only be called when DB mutex is held so we cannot call it in CompactionJob::ProcessKeyValueCompaction() since it's not holding the db mutex

Test Plan: make check -j64

Reviewers: sdong, andrewkr

Reviewed By: andrewkr

Subscribers: andrewkr, dhruba

Differential Revision: https://reviews.facebook.net/D57471
2016-05-02 15:09:38 -07:00
Dmitri Smirnov
5407b7c6d9 Fix multiple issues with WinMmapFile fo sequential writing (#1108)
make preallocation inline with other writable files
  make sure that we map no more than pre-allocated size.
2016-05-02 15:09:38 -07:00
29 changed files with 618 additions and 222 deletions

View File

@ -1,9 +1,9 @@
## Unreleased
# RocksDB default options change log
## 4.8.0 (5/2/2016)
* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters.
* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks.
* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata.
# RocksDB default options change log
## 4.7.0 (4/8/2016)
* options.write_buffer_size changes from 4MB to 64MB.
* options.target_file_size_base changes from 2MB to 64MB.

View File

@ -1,10 +1,12 @@
# Rocksdb Change Log
## Unreleased
## 4.8.0 (5/2/2016)
### Public API Change
* Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes.
* Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F
* Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN".
* Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status.
### New Features
* Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size.
## 4.7.0 (4/8/2016)
### Public API Change

View File

@ -203,10 +203,6 @@ default: all
WARNING_FLAGS = -W -Wextra -Wall -Wsign-compare -Wshadow \
-Wno-unused-parameter
ifndef DISABLE_WARNING_AS_ERROR
WARNING_FLAGS += -Werror
endif
CFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
CXXFLAGS += $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers

View File

@ -52,12 +52,7 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
FBCODE_BUILD="true"
# If we're compiling with TSAN we need pic build
PIC_BUILD=$COMPILE_WITH_TSAN
if [ -z "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
source "$PWD/build_tools/fbcode_config.sh"
else
# we need this to build with MySQL. Don't use for other purposes.
source "$PWD/build_tools/fbcode_config4.8.1.sh"
fi
source "$PWD/build_tools/fbcode_config.sh"
fi
# Delete existing output, if it exists

View File

@ -1,16 +1,19 @@
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.9.x/centos6-native/1317bc4/
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/f97108c12512b3b0789ac4515d836bdb1eae1142/4.9.x/gcc-4.9-glibc-2.20/024dbc3
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.20/gcc-4.9-glibc-2.20/500e281
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/cbf6f1f209e5bd160bdc5d971744e039f36b1566/1.1.3/gcc-4.9-glibc-2.20/e9936bf
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/6d39cb54708049f527e713ad19f2aadb9d3667e8/1.2.8/gcc-4.9-glibc-2.20/e9936bf
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/2ddd45f0853bfc8bb1c27f0f447236a1a26c338a/1.0.6/gcc-4.9-glibc-2.20/e9936bf
LZ4_BASE=/mnt/gvfs/third-party2/lz4/6858fac689e0f92e584224d91bdb0e39f6c8320d/r131/gcc-4.9-glibc-2.20/e9936bf
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/cb6c4880fcb4fee471574ba6af63a3882155a16a/0.5.1/gcc-4.9-glibc-2.20/e9936bf
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/c7275a4ceae0aca0929e56964a31dafc53c1ee96/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/d9acac8a9a2c9378eb696e22ffa8bd0e97d9206b/master/gcc-4.9-glibc-2.20/a6c5e1e
NUMA_BASE=/mnt/gvfs/third-party2/numa/ae54a5ed22cdabb1c6446dce4e8ffae5b4446d73/2.0.8/gcc-4.9-glibc-2.20/e9936bf
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/41bfa3759ce52c071f5fd547ec9ecd2522929f0a/trunk/gcc-4.9-glibc-2.20/12266b1
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/1a48835975c66d30e47770ec419758ed3b9ba010/3.10.62-62_fbk17_03959_ge29cc63/gcc-4.9-glibc-2.20/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/71454c53fffcb716a0beb9a90047aff7fb5c984a/2.26/centos6-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/af85c56f424cd5edfc2c97588299b44ecdec96bb/3.10.0/gcc-4.9-glibc-2.20/e9936bf
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832

View File

@ -1,4 +1,4 @@
GCC_BASE=/mnt/vol/engshare/fbcode/third-party2/gcc/4.8.1/centos6-native/cc6c9dc/
GCC_BASE=/mnt/gvfs/third-party2/gcc/ebc96bc2fb751b5a0300b8d91a95bdf24ac1d88b/4.8.1/centos6-native/cc6c9dc
CLANG_BASE=/mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/9d9ecb9/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/f97108c12512b3b0789ac4515d836bdb1eae1142/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/f5484f168c0e4d19823d41df052c5870c6e575a4/2.17/gcc-4.8.1-glibc-2.17/99df8fc

View File

@ -13,8 +13,8 @@ source "$BASEDIR/dependencies.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/libs"
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
@ -43,12 +43,16 @@ if test -z $PIC_BUILD; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
CFLAGS+=" -DZSTD"
fi
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
if test -z $PIC_BUILD; then
@ -56,7 +60,7 @@ if test -z $PIC_BUILD; then
else
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=google"
CFLAGS+=" -DGFLAGS=gflags"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
@ -72,13 +76,22 @@ if test -z $PIC_BUILD; then
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
if test -z $PIC_BUILD; then
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
else
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
fi
CFLAGS+=" -DTBB"
# use Intel SSE support for checksum calculations
export USE_SSE=1
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
@ -87,7 +100,7 @@ CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/clang/tools/scan-build/scan-build"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
@ -95,41 +108,45 @@ if [ -z "$USE_CLANG" ]; then
CXX="$GCC_BASE/bin/g++"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.9.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.9-glibc-2.20/lib/ld.so"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.9-glibc-2.20/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD

View File

@ -64,7 +64,7 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.9.x/centos6-native/*/ | head -n1`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.9.x/centos6-native/*/`
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
log_variable GCC_BASE
@ -101,7 +101,7 @@ touch "$OUTPUT"
echo "Writing 4.8.1 dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`ls -d1 $TP2_LATEST/gcc/4.8.1/centos6-native/*/ | head -n1`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
CLANG_BASE=`ls -d1 /mnt/gvfs/third-party2/clang/fc904e50a9266b9d7b98cae1993afa0c5aae1440/3.7.1/centos6-native/*/ | head -n1`
log_variable GCC_BASE

View File

@ -149,7 +149,7 @@ ColumnFamilyOptions SanitizeOptions(const DBOptions& db_options,
result.comparator = icmp;
size_t clamp_max = std::conditional<
sizeof(size_t) == 4, std::integral_constant<size_t, 0xffffffff>,
std::integral_constant<size_t, 64ull << 30>>::type::value;
std::integral_constant<uint64_t, 64ull << 30>>::type::value;
ClipToRange(&result.write_buffer_size, ((size_t)64) << 10, clamp_max);
// if user sets arena_block_size, we trust user to use this value. Otherwise,
// calculate a proper value from writer_buffer_size;

View File

@ -209,7 +209,7 @@ class ColumnFamilyData {
const ImmutableCFOptions* ioptions() const { return &ioptions_; }
// REQUIRES: DB mutex held
// This returns the MutableCFOptions used by current SuperVersion
// You shoul use this API to reference MutableCFOptions most of the time.
// You should use this API to reference MutableCFOptions most of the time.
const MutableCFOptions* GetCurrentMutableCFOptions() const {
return &(super_version_->mutable_cf_options);
}

View File

@ -669,6 +669,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
}
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
const MutableCFOptions* mutable_cf_options =
sub_compact->compaction->mutable_cf_options();
// To build compression dictionary, we sample the first output file, assuming
// it'll reach the maximum length, and then use the dictionary for compressing
@ -680,9 +682,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
cfd->ioptions()->compression_opts.max_dict_bytes > 0) {
const size_t kMaxSamples =
cfd->ioptions()->compression_opts.max_dict_bytes >> kSampleLenShift;
const size_t kOutFileLen =
cfd->GetCurrentMutableCFOptions()->MaxFileSizeForLevel(
compact_->compaction->output_level());
const size_t kOutFileLen = mutable_cf_options->MaxFileSizeForLevel(
compact_->compaction->output_level());
if (kOutFileLen != port::kMaxSizet) {
const size_t kOutFileNumSamples = kOutFileLen >> kSampleLenShift;
Random64 generator{versions_->NewFileNumber()};

View File

@ -160,6 +160,10 @@ TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) {
ASSERT_EQ("NOT_FOUND", Get("foobarbar"));
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
ro.total_order_seek = true;
ASSERT_TRUE(db_->Get(ro, "foobarbar", &value).IsNotFound());
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2);
}
TEST_F(DBBloomFilterTest, WholeKeyFilterProp) {

View File

@ -131,9 +131,8 @@ class DBIter: public Iterator {
}
}
virtual ~DBIter() {
if (pin_thru_lifetime_) {
pinned_iters_mgr_.ReleasePinnedIterators();
}
// Release pinned data if any
pinned_iters_mgr_.ReleasePinnedIterators();
RecordTick(statistics_, NO_ITERATORS, -1);
local_stats_.BumpGlobalStatistics(statistics_);
if (!arena_mode_) {
@ -154,8 +153,13 @@ class DBIter: public Iterator {
}
virtual Slice value() const override {
assert(valid_);
return (direction_ == kForward && !current_entry_is_merged_) ?
iter_->value() : saved_value_;
if (current_entry_is_merged_) {
return saved_value_;
} else if (direction_ == kReverse) {
return pinned_value_;
} else {
return iter_->value();
}
}
virtual Status status() const override {
if (status_.ok()) {
@ -206,6 +210,21 @@ class DBIter: public Iterator {
bool ParseKey(ParsedInternalKey* key);
void MergeValuesNewToOld();
// Temporarily pin the blocks that we encounter until ReleaseTempPinnedData()
// is called
void TempPinData() {
if (!pin_thru_lifetime_) {
pinned_iters_mgr_.StartPinning();
}
}
// Release blocks pinned by TempPinData()
void ReleaseTempPinnedData() {
if (!pin_thru_lifetime_) {
pinned_iters_mgr_.ReleasePinnedIterators();
}
}
inline void ClearSavedValue() {
if (saved_value_.capacity() > 1048576) {
std::string empty;
@ -227,6 +246,7 @@ class DBIter: public Iterator {
Status status_;
IterKey saved_key_;
std::string saved_value_;
Slice pinned_value_;
Direction direction_;
bool valid_;
bool current_entry_is_merged_;
@ -266,6 +286,8 @@ void DBIter::Next() {
assert(valid_);
if (direction_ == kReverse) {
// We only pin blocks when doing kReverse
ReleaseTempPinnedData();
FindNextUserKey();
direction_ = kForward;
if (!iter_->Valid()) {
@ -347,21 +369,24 @@ void DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
case kTypeSingleDeletion:
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
saved_key_.SetKey(ikey.user_key,
!iter_->IsKeyPinned() /* copy */);
saved_key_.SetKey(
ikey.user_key,
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
skipping = true;
num_skipped = 0;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
break;
case kTypeValue:
valid_ = true;
saved_key_.SetKey(ikey.user_key,
!iter_->IsKeyPinned() /* copy */);
saved_key_.SetKey(
ikey.user_key,
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
return;
case kTypeMerge:
// By now, we are sure the current ikey is going to yield a value
saved_key_.SetKey(ikey.user_key,
!iter_->IsKeyPinned() /* copy */);
saved_key_.SetKey(
ikey.user_key,
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
current_entry_is_merged_ = true;
valid_ = true;
MergeValuesNewToOld(); // Go to a different state machine
@ -472,6 +497,7 @@ void DBIter::Prev() {
if (direction_ == kForward) {
ReverseToBackward();
}
ReleaseTempPinnedData();
PrevInternal();
if (statistics_ != nullptr) {
local_stats_.prev_count_++;
@ -524,7 +550,7 @@ void DBIter::PrevInternal() {
while (iter_->Valid()) {
saved_key_.SetKey(ExtractUserKey(iter_->key()),
!iter_->IsKeyPinned() /* copy */);
!iter_->IsKeyPinned() || !pin_thru_lifetime_ /* copy */);
if (FindValueForCurrentKey()) {
valid_ = true;
if (!iter_->Valid()) {
@ -555,6 +581,7 @@ void DBIter::PrevInternal() {
bool DBIter::FindValueForCurrentKey() {
assert(iter_->Valid());
merge_context_.Clear();
current_entry_is_merged_ = false;
// last entry before merge (could be kTypeDeletion, kTypeSingleDeletion or
// kTypeValue)
ValueType last_not_merge_type = kTypeDeletion;
@ -575,7 +602,9 @@ bool DBIter::FindValueForCurrentKey() {
switch (last_key_entry_type) {
case kTypeValue:
merge_context_.Clear();
saved_value_ = iter_->value().ToString();
ReleaseTempPinnedData();
TempPinData();
pinned_value_ = iter_->value();
last_not_merge_type = kTypeValue;
break;
case kTypeDeletion:
@ -605,6 +634,7 @@ bool DBIter::FindValueForCurrentKey() {
valid_ = false;
return false;
case kTypeMerge:
current_entry_is_merged_ = true;
if (last_not_merge_type == kTypeDeletion) {
StopWatchNano timer(env_, statistics_ != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos);
@ -615,12 +645,10 @@ bool DBIter::FindValueForCurrentKey() {
timer.ElapsedNanos());
} else {
assert(last_not_merge_type == kTypeValue);
std::string last_put_value = saved_value_;
Slice temp_slice(last_put_value);
{
StopWatchNano timer(env_, statistics_ != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos);
user_merge_operator_->FullMerge(saved_key_.GetKey(), &temp_slice,
user_merge_operator_->FullMerge(saved_key_.GetKey(), &pinned_value_,
merge_context_.GetOperands(),
&saved_value_, logger_);
RecordTick(statistics_, MERGE_OPERATION_TOTAL_TIME,
@ -655,7 +683,9 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
if (ikey.type == kTypeValue || ikey.type == kTypeDeletion ||
ikey.type == kTypeSingleDeletion) {
if (ikey.type == kTypeValue) {
saved_value_ = iter_->value().ToString();
ReleaseTempPinnedData();
TempPinData();
pinned_value_ = iter_->value();
valid_ = true;
return true;
}
@ -665,6 +695,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
// kTypeMerge. We need to collect all kTypeMerge values and save them
// in operands
current_entry_is_merged_ = true;
merge_context_.Clear();
while (iter_->Valid() &&
user_comparator_->Equal(ikey.user_key, saved_key_.GetKey()) &&
@ -767,6 +798,7 @@ void DBIter::FindParseableKey(ParsedInternalKey* ikey, Direction direction) {
void DBIter::Seek(const Slice& target) {
StopWatch sw(env_, statistics_, DB_SEEK);
ReleaseTempPinnedData();
saved_key_.Clear();
// now savved_key is used to store internal key.
saved_key_.SetInternalKey(target, sequence_);
@ -809,6 +841,7 @@ void DBIter::SeekToFirst() {
max_skip_ = std::numeric_limits<uint64_t>::max();
}
direction_ = kForward;
ReleaseTempPinnedData();
ClearSavedValue();
{
@ -841,6 +874,7 @@ void DBIter::SeekToLast() {
max_skip_ = std::numeric_limits<uint64_t>::max();
}
direction_ = kReverse;
ReleaseTempPinnedData();
ClearSavedValue();
{

View File

@ -9,6 +9,7 @@
#include "db/db_test_util.h"
#include "port/stack_trace.h"
#include "rocksdb/iostats_context.h"
#include "rocksdb/perf_context.h"
namespace rocksdb {
@ -1228,6 +1229,221 @@ TEST_F(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
delete iter;
}
TEST_F(DBIteratorTest, IterPrevKeyCrossingBlocks) {
Options options = CurrentOptions();
BlockBasedTableOptions table_options;
table_options.block_size = 1; // every block will contain one entry
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
options.disable_auto_compactions = true;
options.max_sequential_skip_in_iterations = 8;
DestroyAndReopen(options);
// Putting such deletes will force DBIter::Prev() to fallback to a Seek
for (int file_num = 0; file_num < 10; file_num++) {
ASSERT_OK(Delete("key4"));
ASSERT_OK(Flush());
}
// First File containing 5 blocks of puts
ASSERT_OK(Put("key1", "val1.0"));
ASSERT_OK(Put("key2", "val2.0"));
ASSERT_OK(Put("key3", "val3.0"));
ASSERT_OK(Put("key4", "val4.0"));
ASSERT_OK(Put("key5", "val5.0"));
ASSERT_OK(Flush());
// Second file containing 9 blocks of merge operands
ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
ASSERT_OK(Flush());
{
ReadOptions ro;
ro.fill_cache = false;
Iterator* iter = db_->NewIterator(ro);
iter->SeekToLast();
ASSERT_EQ(iter->key().ToString(), "key5");
ASSERT_EQ(iter->value().ToString(), "val5.0");
iter->Prev();
ASSERT_EQ(iter->key().ToString(), "key4");
ASSERT_EQ(iter->value().ToString(), "val4.0");
iter->Prev();
ASSERT_EQ(iter->key().ToString(), "key3");
ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
iter->Prev();
ASSERT_EQ(iter->key().ToString(), "key2");
ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
iter->Prev();
ASSERT_EQ(iter->key().ToString(), "key1");
ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
delete iter;
}
}
TEST_F(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
Options options = CurrentOptions();
options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
options.disable_auto_compactions = true;
options.level0_slowdown_writes_trigger = (1 << 30);
options.level0_stop_writes_trigger = (1 << 30);
options.max_sequential_skip_in_iterations = 8;
DestroyAndReopen(options);
const int kNumKeys = 500;
// Small number of merge operands to make sure that DBIter::Prev() dont
// fall back to Seek()
const int kNumMergeOperands = 3;
// Use value size that will make sure that every block contain 1 key
const int kValSize =
static_cast<int>(BlockBasedTableOptions().block_size) * 4;
// Percentage of keys that wont get merge operations
const int kNoMergeOpPercentage = 20;
// Percentage of keys that will be deleted
const int kDeletePercentage = 10;
// For half of the key range we will write multiple deletes first to
// force DBIter::Prev() to fall back to Seek()
for (int file_num = 0; file_num < 10; file_num++) {
for (int i = 0; i < kNumKeys; i += 2) {
ASSERT_OK(Delete(Key(i)));
}
ASSERT_OK(Flush());
}
Random rnd(301);
std::map<std::string, std::string> true_data;
std::string gen_key;
std::string gen_val;
for (int i = 0; i < kNumKeys; i++) {
gen_key = Key(i);
gen_val = RandomString(&rnd, kValSize);
ASSERT_OK(Put(gen_key, gen_val));
true_data[gen_key] = gen_val;
}
ASSERT_OK(Flush());
// Separate values and merge operands in different file so that we
// make sure that we dont merge them while flushing but actually
// merge them in the read path
for (int i = 0; i < kNumKeys; i++) {
if (rnd.OneIn(static_cast<int>(100.0 / kNoMergeOpPercentage))) {
// Dont give merge operations for some keys
continue;
}
for (int j = 0; j < kNumMergeOperands; j++) {
gen_key = Key(i);
gen_val = RandomString(&rnd, kValSize);
ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
true_data[gen_key] += "," + gen_val;
}
}
ASSERT_OK(Flush());
for (int i = 0; i < kNumKeys; i++) {
if (rnd.OneIn(static_cast<int>(100.0 / kDeletePercentage))) {
gen_key = Key(i);
ASSERT_OK(Delete(gen_key));
true_data.erase(gen_key);
}
}
ASSERT_OK(Flush());
{
ReadOptions ro;
ro.fill_cache = false;
Iterator* iter = db_->NewIterator(ro);
auto data_iter = true_data.rbegin();
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_EQ(iter->key().ToString(), data_iter->first);
ASSERT_EQ(iter->value().ToString(), data_iter->second);
data_iter++;
}
ASSERT_EQ(data_iter, true_data.rend());
delete iter;
}
{
ReadOptions ro;
ro.fill_cache = false;
Iterator* iter = db_->NewIterator(ro);
auto data_iter = true_data.rbegin();
int entries_right = 0;
std::string seek_key;
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
// Verify key/value of current position
ASSERT_EQ(iter->key().ToString(), data_iter->first);
ASSERT_EQ(iter->value().ToString(), data_iter->second);
bool restore_position_with_seek = rnd.Uniform(2);
if (restore_position_with_seek) {
seek_key = iter->key().ToString();
}
// Do some Next() operations the restore the iterator to orignal position
int next_count =
entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
for (int i = 0; i < next_count; i++) {
iter->Next();
data_iter--;
ASSERT_EQ(iter->key().ToString(), data_iter->first);
ASSERT_EQ(iter->value().ToString(), data_iter->second);
}
if (restore_position_with_seek) {
// Restore orignal position using Seek()
iter->Seek(seek_key);
for (int i = 0; i < next_count; i++) {
data_iter++;
}
ASSERT_EQ(iter->key().ToString(), data_iter->first);
ASSERT_EQ(iter->value().ToString(), data_iter->second);
} else {
// Restore original position using Prev()
for (int i = 0; i < next_count; i++) {
iter->Prev();
data_iter++;
ASSERT_EQ(iter->key().ToString(), data_iter->first);
ASSERT_EQ(iter->value().ToString(), data_iter->second);
}
}
entries_right++;
data_iter++;
}
ASSERT_EQ(data_iter, true_data.rend());
delete iter;
}
}
TEST_F(DBIteratorTest, IteratorWithLocalStatistics) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
@ -1310,6 +1526,76 @@ TEST_F(DBIteratorTest, IteratorWithLocalStatistics) {
ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), total_bytes);
}
TEST_F(DBIteratorTest, ReadAhead) {
Options options;
env_->count_random_reads_ = true;
options.env = env_;
options.disable_auto_compactions = true;
options.write_buffer_size = 4 << 20;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
table_options.no_block_cache = true;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
Reopen(options);
std::string value(1024, 'a');
for (int i = 0; i < 100; i++) {
Put(Key(i), value);
}
ASSERT_OK(Flush());
MoveFilesToLevel(2);
for (int i = 0; i < 100; i++) {
Put(Key(i), value);
}
ASSERT_OK(Flush());
MoveFilesToLevel(1);
for (int i = 0; i < 100; i++) {
Put(Key(i), value);
}
ASSERT_OK(Flush());
#ifndef ROCKSDB_LITE
ASSERT_EQ("1,1,1", FilesPerLevel());
#endif // !ROCKSDB_LITE
env_->random_read_bytes_counter_ = 0;
options.statistics->setTickerCount(NO_FILE_OPENS, 0);
ReadOptions read_options;
auto* iter = db_->NewIterator(read_options);
iter->SeekToFirst();
int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
size_t bytes_read = env_->random_read_bytes_counter_;
delete iter;
env_->random_read_bytes_counter_ = 0;
options.statistics->setTickerCount(NO_FILE_OPENS, 0);
read_options.readahead_size = 1024 * 10;
iter = db_->NewIterator(read_options);
iter->SeekToFirst();
int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
size_t bytes_read_readahead = env_->random_read_bytes_counter_;
delete iter;
ASSERT_EQ(num_file_opens + 3, num_file_opens_readahead);
ASSERT_GT(bytes_read_readahead, bytes_read);
ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
// Verify correctness.
iter = db_->NewIterator(read_options);
int count = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_EQ(value, iter->value());
count++;
}
ASSERT_EQ(100, count);
for (int i = 0; i < 100; i++) {
iter->Seek(Key(i));
ASSERT_EQ(value, iter->value());
}
delete iter;
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -358,23 +358,30 @@ class SpecialEnv : public EnvWrapper {
class CountingFile : public RandomAccessFile {
public:
CountingFile(unique_ptr<RandomAccessFile>&& target,
anon::AtomicCounter* counter)
: target_(std::move(target)), counter_(counter) {}
anon::AtomicCounter* counter,
std::atomic<size_t>* bytes_read)
: target_(std::move(target)),
counter_(counter),
bytes_read_(bytes_read) {}
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
counter_->Increment();
return target_->Read(offset, n, result, scratch);
Status s = target_->Read(offset, n, result, scratch);
*bytes_read_ += result->size();
return s;
}
private:
unique_ptr<RandomAccessFile> target_;
anon::AtomicCounter* counter_;
std::atomic<size_t>* bytes_read_;
};
Status s = target()->NewRandomAccessFile(f, r, soptions);
random_file_open_counter_++;
if (s.ok() && count_random_reads_) {
r->reset(new CountingFile(std::move(*r), &random_read_counter_));
r->reset(new CountingFile(std::move(*r), &random_read_counter_,
&random_read_bytes_counter_));
}
return s;
}
@ -464,6 +471,7 @@ class SpecialEnv : public EnvWrapper {
bool count_random_reads_;
anon::AtomicCounter random_read_counter_;
std::atomic<size_t> random_read_bytes_counter_;
std::atomic<int> random_file_open_counter_;
bool count_sequential_reads_;

View File

@ -87,15 +87,16 @@ void TableCache::ReleaseHandle(Cache::Handle* handle) {
Status TableCache::GetTableReader(
const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const FileDescriptor& fd,
bool sequential_mode, bool record_read_stats, HistogramImpl* file_read_hist,
unique_ptr<TableReader>* table_reader, bool skip_filters, int level) {
bool sequential_mode, size_t readahead, bool record_read_stats,
HistogramImpl* file_read_hist, unique_ptr<TableReader>* table_reader,
bool skip_filters, int level) {
std::string fname =
TableFileName(ioptions_.db_paths, fd.GetNumber(), fd.GetPathId());
unique_ptr<RandomAccessFile> file;
Status s = ioptions_.env->NewRandomAccessFile(fname, &file, env_options);
if (sequential_mode && ioptions_.compaction_readahead_size > 0) {
file = NewReadaheadRandomAccessFile(std::move(file),
ioptions_.compaction_readahead_size);
if (readahead > 0) {
file = NewReadaheadRandomAccessFile(std::move(file), readahead);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.ok()) {
@ -143,8 +144,9 @@ Status TableCache::FindTable(const EnvOptions& env_options,
}
unique_ptr<TableReader> table_reader;
s = GetTableReader(env_options, internal_comparator, fd,
false /* sequential mode */, record_read_stats,
file_read_hist, &table_reader, skip_filters, level);
false /* sequential mode */, 0 /* readahead */,
record_read_stats, file_read_hist, &table_reader,
skip_filters, level);
if (!s.ok()) {
assert(table_reader == nullptr);
RecordTick(ioptions_.statistics, NO_FILE_ERRORS);
@ -175,13 +177,24 @@ InternalIterator* TableCache::NewIterator(
TableReader* table_reader = nullptr;
Cache::Handle* handle = nullptr;
bool create_new_table_reader =
(for_compaction && ioptions_.new_table_reader_for_compaction_inputs);
size_t readahead = 0;
bool create_new_table_reader = false;
if (for_compaction) {
if (ioptions_.new_table_reader_for_compaction_inputs) {
readahead = ioptions_.compaction_readahead_size;
create_new_table_reader = true;
}
} else {
readahead = options.readahead_size;
create_new_table_reader = readahead > 0;
}
if (create_new_table_reader) {
unique_ptr<TableReader> table_reader_unique_ptr;
Status s = GetTableReader(
env_options, icomparator, fd, /* sequential mode */ true,
/* record stats */ false, nullptr, &table_reader_unique_ptr,
env_options, icomparator, fd, true /* sequential_mode */, readahead,
!for_compaction /* record stats */, nullptr, &table_reader_unique_ptr,
false /* skip_filters */, level);
if (!s.ok()) {
return NewErrorInternalIterator(s, arena);

View File

@ -111,7 +111,8 @@ class TableCache {
Status GetTableReader(const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator,
const FileDescriptor& fd, bool sequential_mode,
bool record_read_stats, HistogramImpl* file_read_hist,
size_t readahead, bool record_read_stats,
HistogramImpl* file_read_hist,
unique_ptr<TableReader>* table_reader,
bool skip_filters = false, int level = -1);

View File

@ -824,6 +824,10 @@ struct DBOptions {
// Some functions that make it easier to optimize RocksDB
// Use this if your DB is very small (like under 1GB) and you don't want to
// spend lots of memory for memtables.
DBOptions* OptimizeForSmallDb();
#ifndef ROCKSDB_LITE
// By default, RocksDB uses only one background thread for flush and
// compaction. Calling this function will set it up such that total of
@ -898,7 +902,7 @@ struct DBOptions {
// If max_open_files is -1, DB will open all files on DB::Open(). You can
// use this option to increase the number of threads used to open the files.
// Default: 1
// Default: 16
int max_file_opening_threads;
// Once write-ahead logs exceed this size, we will start forcing the flush of
@ -1343,6 +1347,8 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
void DumpCFOptions(Logger* log) const;
// Some functions that make it easier to optimize RocksDB
// Set appropriate parameters for bulk loading.
// The reason that this is a function that returns "this" instead of a
// constructor is to enable chaining of multiple similar calls in the future.
@ -1352,6 +1358,10 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
// It's recommended to manually call CompactRange(NULL, NULL) before reading
// from the database, because otherwise the read can be very slow.
Options* PrepareForBulkLoad();
// Use this if your DB is very small (like under 1GB) and you don't want to
// spend lots of memory for memtables.
Options* OptimizeForSmallDb();
};
//
@ -1448,6 +1458,9 @@ struct ReadOptions {
// Enable a total order seek regardless of index format (e.g. hash index)
// used in the table. Some table format (e.g. plain table) may not support
// this option.
// If true when calling Get(), we also skip prefix bloom when reading from
// block based table. It provides a way to read exisiting data after
// changing implementation of prefix extractor.
bool total_order_seek;
// Enforce that the iterator only iterates over the same prefix as the seek.
@ -1466,6 +1479,12 @@ struct ReadOptions {
// Default: false
bool pin_data;
// If non-zero, NewIterator will create a new table reader which
// performs reads of the given size. Using a large size (> 2MB) can
// improve the performance of forward iteration on spinning disks.
// Default: 0
size_t readahead_size;
ReadOptions();
ReadOptions(bool cksum, bool cache);
};

View File

@ -5,7 +5,7 @@
#pragma once
#define ROCKSDB_MAJOR 4
#define ROCKSDB_MINOR 7
#define ROCKSDB_MINOR 8
#define ROCKSDB_PATCH 0
// Do not use these. We made the mistake of declaring macros starting with

View File

@ -262,8 +262,11 @@ class WinMmapFile : public WritableFile {
// page size or SSD page size
const size_t
allocation_granularity_; // View must start at such a granularity
size_t mapping_size_; // We want file mapping to be of a specific size
// because then the file is expandable
size_t reserved_size_; // Preallocated size
size_t mapping_size_; // The max size of the mapping object
// we want to guess the final file size to minimize the remapping
size_t view_size_; // How much memory to map into a view at a time
char* mapped_begin_; // Must begin at the file offset that is aligned with
@ -283,15 +286,6 @@ class WinMmapFile : public WritableFile {
return ftruncate(filename_, hFile_, toSize);
}
// Can only truncate or reserve to a sector size aligned if
// used on files that are opened with Unbuffered I/O
// Normally it does not present a problem since in memory mapped files
// we do not disable buffering
Status ReserveFileSpace(uint64_t toSize) {
IOSTATS_TIMER_GUARD(allocate_nanos);
return fallocate(filename_, hFile_, toSize);
}
Status UnmapCurrentRegion() {
Status status;
@ -301,82 +295,57 @@ class WinMmapFile : public WritableFile {
"Failed to unmap file view: " + filename_, GetLastError());
}
// UnmapView automatically sends data to disk but not the metadata
// which is good and provides some equivalent of fdatasync() on Linux
// therefore, we donot need separate flag for metadata
pending_sync_ = false;
mapped_begin_ = nullptr;
mapped_end_ = nullptr;
dst_ = nullptr;
last_sync_ = nullptr;
// Move on to the next portion of the file
file_offset_ += view_size_;
// Increase the amount we map the next time, but capped at 1MB
view_size_ *= 2;
view_size_ = std::min(view_size_, c_OneMB);
// UnmapView automatically sends data to disk but not the metadata
// which is good and provides some equivalent of fdatasync() on Linux
// therefore, we donot need separate flag for metadata
mapped_begin_ = nullptr;
mapped_end_ = nullptr;
dst_ = nullptr;
last_sync_ = nullptr;
pending_sync_ = false;
}
return status;
}
Status MapNewRegion() {
Status status;
assert(mapped_begin_ == nullptr);
size_t minMappingSize = file_offset_ + view_size_;
size_t minDiskSize = file_offset_ + view_size_;
// Check if we need to create a new mapping since we want to write beyond
// the current one
// If the mapping view is now too short
// CreateFileMapping will extend the size of the file automatically if the
// mapping size is greater than
// the current length of the file, which reserves the space and makes
// writing faster, except, windows can not map an empty file.
// Thus the first time around we must actually extend the file ourselves
if (hMap_ == NULL || minMappingSize > mapping_size_) {
if (NULL == hMap_) {
// Creating mapping for the first time so reserve the space on disk
status = ReserveFileSpace(minMappingSize);
if (!status.ok()) {
return status;
}
if (minDiskSize > reserved_size_) {
status = Allocate(file_offset_, view_size_);
if (!status.ok()) {
return status;
}
}
if (hMap_) {
// Need to remap
if (hMap_ == NULL || reserved_size_ > mapping_size_) {
if (hMap_ != NULL) {
// Unmap the previous one
BOOL ret = ::CloseHandle(hMap_);
assert(ret);
hMap_ = NULL;
}
// Calculate the new mapping size which will hopefully reserve space for
// several consecutive sliding views
// Query preallocation block size if set
size_t preallocationBlockSize = 0;
size_t lastAllocatedBlockSize = 0; // Not used
GetPreallocationStatus(&preallocationBlockSize, &lastAllocatedBlockSize);
if (preallocationBlockSize) {
preallocationBlockSize =
Roundup(preallocationBlockSize, allocation_granularity_);
} else {
preallocationBlockSize = 2 * view_size_;
}
mapping_size_ += preallocationBlockSize;
ULARGE_INTEGER mappingSize;
mappingSize.QuadPart = mapping_size_;
mappingSize.QuadPart = reserved_size_;
hMap_ = CreateFileMappingA(
hFile_,
NULL, // Security attributes
PAGE_READWRITE, // There is not a write only mode for mapping
mappingSize.HighPart, // Enable mapping the whole file but the actual
// amount mapped is determined by MapViewOfFile
// amount mapped is determined by MapViewOfFile
mappingSize.LowPart,
NULL); // Mapping name
@ -385,6 +354,8 @@ class WinMmapFile : public WritableFile {
"WindowsMmapFile failed to create file mapping for: " + filename_,
GetLastError());
}
mapping_size_ = reserved_size_;
}
ULARGE_INTEGER offset;
@ -416,6 +387,7 @@ class WinMmapFile : public WritableFile {
hMap_(NULL),
page_size_(page_size),
allocation_granularity_(allocation_granularity),
reserved_size_(0),
mapping_size_(0),
view_size_(0),
mapped_begin_(nullptr),
@ -435,25 +407,10 @@ class WinMmapFile : public WritableFile {
// Only for memory mapped writes
assert(options.use_mmap_writes);
// Make sure buffering is not disabled. It is ignored for mapping
// purposes but also imposes restriction on moving file position
// it is not a problem so much with reserving space since it is probably a
// factor
// of allocation_granularity but we also want to truncate the file in
// Close() at
// arbitrary position so we do not have to feel this with zeros.
assert(options.use_os_buffer);
// View size must be both the multiple of allocation_granularity AND the
// page size
if ((allocation_granularity_ % page_size_) == 0) {
view_size_ = 2 * allocation_granularity;
} else if ((page_size_ % allocation_granularity_) == 0) {
view_size_ = 2 * page_size_;
} else {
// we can multiply them together
assert(false);
}
// page size and the granularity is usually a multiple of a page size.
const size_t viewSize = 32 * 1024; // 32Kb similar to the Windows File Cache in buffered mode
view_size_ = Roundup(viewSize, allocation_granularity_);
}
~WinMmapFile() {
@ -479,14 +436,20 @@ class WinMmapFile : public WritableFile {
if (!s.ok()) {
return s;
}
} else {
size_t n = std::min(left, avail);
memcpy(dst_, src, n);
dst_ += n;
src += n;
left -= n;
pending_sync_ = true;
}
}
size_t n = std::min(left, avail);
memcpy(dst_, src, n);
dst_ += n;
src += n;
left -= n;
pending_sync_ = true;
// Now make sure that the last partial page is padded with zeros if needed
size_t bytesToPad = Roundup(size_t(dst_), page_size_) - size_t(dst_);
if (bytesToPad > 0) {
memset(dst_, 0, bytesToPad);
}
return Status::OK();
@ -508,7 +471,13 @@ class WinMmapFile : public WritableFile {
// which we use does not write zeros and it is good.
uint64_t targetSize = GetFileSize();
s = UnmapCurrentRegion();
if (mapped_begin_ != nullptr) {
// Sync before unmapping to make sure everything
// is on disk and there is not a lazy writing
// so we are deterministic with the tests
Sync();
s = UnmapCurrentRegion();
}
if (NULL != hMap_) {
BOOL ret = ::CloseHandle(hMap_);
@ -521,15 +490,18 @@ class WinMmapFile : public WritableFile {
hMap_ = NULL;
}
TruncateFile(targetSize);
if (hFile_ != NULL) {
BOOL ret = ::CloseHandle(hFile_);
hFile_ = NULL;
TruncateFile(targetSize);
if (!ret && s.ok()) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError(
"Failed to close file map handle: " + filename_, lastError);
BOOL ret = ::CloseHandle(hFile_);
hFile_ = NULL;
if (!ret && s.ok()) {
auto lastError = GetLastError();
s = IOErrorFromWindowsError(
"Failed to close file map handle: " + filename_, lastError);
}
}
return s;
@ -542,7 +514,7 @@ class WinMmapFile : public WritableFile {
Status s;
// Some writes occurred since last sync
if (pending_sync_) {
if (dst_ > last_sync_) {
assert(mapped_begin_);
assert(dst_);
assert(dst_ > mapped_begin_);
@ -552,16 +524,15 @@ class WinMmapFile : public WritableFile {
TruncateToPageBoundary(page_size_, last_sync_ - mapped_begin_);
size_t page_end =
TruncateToPageBoundary(page_size_, dst_ - mapped_begin_ - 1);
last_sync_ = dst_;
// Flush only the amount of that is a multiple of pages
if (!::FlushViewOfFile(mapped_begin_ + page_begin,
(page_end - page_begin) + page_size_)) {
(page_end - page_begin) + page_size_)) {
s = IOErrorFromWindowsError("Failed to FlushViewOfFile: " + filename_,
GetLastError());
} else {
last_sync_ = dst_;
}
pending_sync_ = false;
}
return s;
@ -571,19 +542,15 @@ class WinMmapFile : public WritableFile {
* Flush data as well as metadata to stable storage.
*/
virtual Status Fsync() override {
Status s;
// Flush metadata if pending
const bool pending = pending_sync_;
s = Sync();
Status s = Sync();
// Flush metadata
if (s.ok() && pending) {
if (s.ok() && pending_sync_) {
if (!::FlushFileBuffers(hFile_)) {
s = IOErrorFromWindowsError("Failed to FlushFileBuffers: " + filename_,
GetLastError());
}
pending_sync_ = false;
}
return s;
@ -604,7 +571,24 @@ class WinMmapFile : public WritableFile {
}
virtual Status Allocate(uint64_t offset, uint64_t len) override {
return Status::OK();
Status status;
TEST_KILL_RANDOM("WinMmapFile::Allocate", rocksdb_kill_odds);
// Make sure that we reserve an aligned amount of space
// since the reservation block size is driven outside so we want
// to check if we are ok with reservation here
size_t spaceToReserve = Roundup(offset + len, view_size_);
// Nothing to do
if (spaceToReserve <= reserved_size_) {
return status;
}
IOSTATS_TIMER_GUARD(allocate_nanos);
status = fallocate(filename_, hFile_, spaceToReserve);
if (status.ok()) {
reserved_size_ = spaceToReserve;
}
return status;
}
};

View File

@ -1332,7 +1332,8 @@ InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
NewIndexIterator(read_options), arena);
}
bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
FilterBlockReader* filter,
const Slice& internal_key) const {
if (filter == nullptr || filter->IsBlockBased()) {
return true;
@ -1341,7 +1342,7 @@ bool BlockBasedTable::FullFilterKeyMayMatch(FilterBlockReader* filter,
if (!filter->KeyMayMatch(user_key)) {
return false;
}
if (rep_->ioptions.prefix_extractor &&
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
!filter->PrefixMayMatch(
rep_->ioptions.prefix_extractor->Transform(user_key))) {
@ -1361,7 +1362,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
// First check the full filter
// If full filter not useful, Then go into each block
if (!FullFilterKeyMayMatch(filter, key)) {
if (!FullFilterKeyMayMatch(read_options, filter, key)) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
} else {
BlockIter iiter;

View File

@ -207,7 +207,8 @@ class BlockBasedTable : public TableReader {
IndexReader** index_reader,
InternalIterator* preloaded_meta_index_iter = nullptr);
bool FullFilterKeyMayMatch(FilterBlockReader* filter,
bool FullFilterKeyMayMatch(const ReadOptions& read_options,
FilterBlockReader* filter,
const Slice& user_key) const;
// Read the meta block from sst.

View File

@ -34,6 +34,7 @@
#include "table/plain_table_factory.h"
#include "tools/ldb_cmd.h"
#include "util/random.h"
#include "util/compression.h"
#include "port/port.h"
@ -194,15 +195,19 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
};
for (auto& i : compressions) {
CompressionOptions compress_opt;
std::string column_family_name;
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories,
i.first, compress_opt,
nullptr /* compression_dict */,
false /* skip_filters */, column_family_name);
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size);
fprintf(stdout, "Compression: %s", i.second);
fprintf(stdout, " Size: %" PRIu64 "\n", file_size);
if (CompressionTypeSupported(i.first)) {
CompressionOptions compress_opt;
std::string column_family_name;
TableBuilderOptions tb_opts(imoptions, ikc, &block_based_table_factories,
i.first, compress_opt,
nullptr /* compression_dict */,
false /* skip_filters */, column_family_name);
uint64_t file_size = CalculateCompressedTableSize(tb_opts, block_size);
fprintf(stdout, "Compression: %s", i.second);
fprintf(stdout, " Size: %" PRIu64 "\n", file_size);
} else {
fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
}
}
return 0;
}

View File

@ -668,6 +668,12 @@ Options::PrepareForBulkLoad()
return this;
}
Options* Options::OptimizeForSmallDb() {
ColumnFamilyOptions::OptimizeForSmallDb();
DBOptions::OptimizeForSmallDb();
return this;
}
Options* Options::OldDefaults(int rocksdb_major_version,
int rocksdb_minor_version) {
ColumnFamilyOptions::OldDefaults(rocksdb_major_version,
@ -705,6 +711,12 @@ ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults(
}
// Optimization functions
DBOptions* DBOptions::OptimizeForSmallDb() {
max_file_opening_threads = 1;
max_open_files = 5000;
return this;
}
ColumnFamilyOptions* ColumnFamilyOptions::OptimizeForSmallDb() {
write_buffer_size = 2 << 20;
target_file_size_base = 2 * 1048576;
@ -794,7 +806,8 @@ ReadOptions::ReadOptions()
managed(false),
total_order_seek(false),
prefix_same_as_start(false),
pin_data(false) {
pin_data(false),
readahead_size(0) {
XFUNC_TEST("", "managed_options", managed_options, xf_manage_options,
reinterpret_cast<ReadOptions*>(this));
}
@ -809,7 +822,8 @@ ReadOptions::ReadOptions(bool cksum, bool cache)
managed(false),
total_order_seek(false),
prefix_same_as_start(false),
pin_data(false) {
pin_data(false),
readahead_size(0) {
XFUNC_TEST("", "managed_options", managed_options, xf_manage_options,
reinterpret_cast<ReadOptions*>(this));
}

View File

@ -4,6 +4,7 @@
// of patent rights can be found in the PATENTS file in the same directory.
#include <math.h>
#include <cmath>
#include <algorithm>
#include "rocksdb/options.h"

View File

@ -806,20 +806,23 @@ Status ParseColumnFamilyOption(const std::string& name,
new_options->compression_opts.level =
ParseInt(value.substr(start, end - start));
start = end + 1;
end = value.find(':', start);
if (end == std::string::npos) {
return Status::InvalidArgument(
"unable to parse the specified CF option " + name);
}
new_options->compression_opts.strategy =
ParseInt(value.substr(start, value.size() - start));
start = end + 1;
if (start >= value.size()) {
return Status::InvalidArgument(
"unable to parse the specified CF option " + name);
}
new_options->compression_opts.max_dict_bytes =
end = value.find(':', start);
new_options->compression_opts.strategy =
ParseInt(value.substr(start, value.size() - start));
// max_dict_bytes is optional for backwards compatibility
if (end != std::string::npos) {
start = end + 1;
if (start >= value.size()) {
return Status::InvalidArgument(
"unable to parse the specified CF option " + name);
}
new_options->compression_opts.max_dict_bytes =
ParseInt(value.substr(start, value.size() - start));
}
} else if (name == "compaction_options_fifo") {
new_options->compaction_options_fifo.max_table_files_size =
ParseUint64(value);

View File

@ -600,9 +600,14 @@ TEST_F(OptionsTest, GetOptionsFromStringTest) {
base_options,
"write_buffer_size=10;max_write_buffer_number=16;"
"block_based_table_factory={block_cache=1M;block_size=4;};"
"create_if_missing=true;max_open_files=1;rate_limiter_bytes_per_sec=1024",
"compression_opts=4:5:6;create_if_missing=true;max_open_files=1;"
"rate_limiter_bytes_per_sec=1024",
&new_options));
ASSERT_EQ(new_options.compression_opts.window_bits, 4);
ASSERT_EQ(new_options.compression_opts.level, 5);
ASSERT_EQ(new_options.compression_opts.strategy, 6);
ASSERT_EQ(new_options.compression_opts.max_dict_bytes, 0);
ASSERT_EQ(new_options.write_buffer_size, 10U);
ASSERT_EQ(new_options.max_write_buffer_number, 16);
BlockBasedTableOptions new_block_based_table_options =
@ -1315,9 +1320,10 @@ TEST_F(OptionsParserTest, DifferentDefault) {
old_default_cf_opts.compaction_pri);
}
ColumnFamilyOptions cf_small_opts;
cf_small_opts.OptimizeForSmallDb();
ASSERT_EQ(2 << 20, cf_small_opts.write_buffer_size);
Options small_opts;
small_opts.OptimizeForSmallDb();
ASSERT_EQ(2 << 20, small_opts.write_buffer_size);
ASSERT_EQ(5000, small_opts.max_open_files);
}
class OptionsSanityCheckTest : public OptionsParserTest {

View File

@ -6,6 +6,7 @@
#include <assert.h>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <string>
#include <unordered_set>

View File

@ -10,6 +10,7 @@
#pragma once
#include <atomic>
#include <functional>
#include <memory>
#include <unordered_map>
#include <vector>