diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 85c1db059..b7eaff37d 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -44,15 +44,15 @@ using std::shared_ptr; enum CompressionType : char { // NOTE: do not change the values of existing entries, as these are // part of the persistent format on disk. - kNoCompression = 0x0, + kNoCompression = 0x0, kSnappyCompression = 0x1, kZlibCompression = 0x2, kBZip2Compression = 0x3 }; enum CompactionStyle : char { - kCompactionStyleLevel = 0x0, // level based compaction style - kCompactionStyleUniversal = 0x1 // Universal compaction style + kCompactionStyleLevel = 0x0, // level based compaction style + kCompactionStyleUniversal = 0x1 // Universal compaction style }; // Compression options for different compression algorithms like Zlib @@ -60,12 +60,9 @@ struct CompressionOptions { int window_bits; int level; int strategy; - CompressionOptions():window_bits(-14), - level(-1), - strategy(0){} - CompressionOptions(int wbits, int lev, int strategy):window_bits(wbits), - level(lev), - strategy(strategy){} + CompressionOptions() : window_bits(-14), level(-1), strategy(0) {} + CompressionOptions(int wbits, int lev, int strategy) + : window_bits(wbits), level(lev), strategy(strategy) {} }; // Options to control the behavior of a database (passed to DB::Open) @@ -216,7 +213,6 @@ struct Options { // Default: 16 int block_restart_interval; - // Compress blocks using the specified compression algorithm. This // parameter can be changed dynamically. // @@ -247,7 +243,7 @@ struct Options { // java/C api hard to construct. std::vector compression_per_level; - //different options for compression algorithms + // different options for compression algorithms CompressionOptions compression_opts; // If non-nullptr, use the specified filter policy to reduce disk reads. @@ -326,7 +322,6 @@ struct Options { // will be 20MB, total file size for level-2 will be 200MB, // and total file size for level-3 will be 2GB. - // by default 'max_bytes_for_level_base' is 10MB. uint64_t max_bytes_for_level_base; // by default 'max_bytes_for_level_base' is 10. @@ -484,10 +479,19 @@ struct Options { // order. int table_cache_remove_scan_count_limit; - // size of one block in arena memory allocation. - // If <= 0, a proper value is automatically calculated (usually 1/10 of + // Size of one block in arena memory allocation. + // + // If <= 0, a proper value is automatically calculated (usually about 1/10 of // writer_buffer_size). // + // There are two additonal restriction of the The specified size: + // (1) size should be in the range of [4096, 2 << 30] and + // (2) be the multiple of the CPU word (which helps with the memory + // alignment). + // + // We'll automatically check and adjust the size number to make sure it + // conforms to the restrictions. + // // Default: 0 size_t arena_block_size; @@ -572,7 +576,12 @@ struct Options { // Specify the file access pattern once a compaction is started. // It will be applied to all input files of a compaction. // Default: NORMAL - enum { NONE, NORMAL, SEQUENTIAL, WILLNEED } access_hint_on_compaction_start; + enum { + NONE, + NORMAL, + SEQUENTIAL, + WILLNEED + } access_hint_on_compaction_start; // Use adaptive mutex, which spins in the user space before resorting // to kernel. This could reduce context switch when the mutex is not @@ -622,7 +631,7 @@ struct Options { // Default: emtpy vector -- no user-defined statistics collection will be // performed. std::vector> - table_properties_collectors; + table_properties_collectors; // Allows thread-safe inplace updates. Requires Updates iff // * key exists in current memtable @@ -644,7 +653,7 @@ struct Options { // the block cache. It will not page in data from the OS cache or data that // resides in storage. enum ReadTier { - kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage + kReadAllTier = 0x0, // data in memtable, block cache, OS cache or storage kBlockCacheTier = 0x1 // data in memtable or block cache }; @@ -697,13 +706,14 @@ struct ReadOptions { prefix_seek(false), snapshot(nullptr), prefix(nullptr), - read_tier(kReadAllTier) { - } - ReadOptions(bool cksum, bool cache) : - verify_checksums(cksum), fill_cache(cache), - prefix_seek(false), snapshot(nullptr), prefix(nullptr), - read_tier(kReadAllTier) { - } + read_tier(kReadAllTier) {} + ReadOptions(bool cksum, bool cache) + : verify_checksums(cksum), + fill_cache(cache), + prefix_seek(false), + snapshot(nullptr), + prefix(nullptr), + read_tier(kReadAllTier) {} }; // Options that control write operations @@ -730,10 +740,7 @@ struct WriteOptions { // and the write may got lost after a crash. bool disableWAL; - WriteOptions() - : sync(false), - disableWAL(false) { - } + WriteOptions() : sync(false), disableWAL(false) {} }; // Options that control flush operations @@ -742,9 +749,7 @@ struct FlushOptions { // Default: true bool wait; - FlushOptions() - : wait(true) { - } + FlushOptions() : wait(true) {} }; } // namespace rocksdb diff --git a/util/arena_impl.cc b/util/arena_impl.cc index d5c2a537e..5125e2364 100644 --- a/util/arena_impl.cc +++ b/util/arena_impl.cc @@ -8,71 +8,86 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "util/arena_impl.h" +#include namespace rocksdb { -ArenaImpl::ArenaImpl(size_t block_size) { - if (block_size < kMinBlockSize) { - block_size_ = kMinBlockSize; - } else if (block_size > kMaxBlockSize) { - block_size_ = kMaxBlockSize; - } else { - block_size_ = block_size; +const size_t ArenaImpl::kMinBlockSize = 4096; +const size_t ArenaImpl::kMaxBlockSize = 2 << 30; +static const int kAlignUnit = sizeof(void*); + +size_t OptimizeBlockSize(size_t block_size) { + // Make sure block_size is in optimal range + block_size = std::max(ArenaImpl::kMinBlockSize, block_size); + block_size = std::min(ArenaImpl::kMaxBlockSize, block_size); + + // make sure block_size is the multiple of kAlignUnit + if (block_size % kAlignUnit != 0) { + block_size = (1 + block_size / kAlignUnit) * kAlignUnit; } - blocks_memory_ = 0; - alloc_ptr_ = nullptr; // First allocation will allocate a block - alloc_bytes_remaining_ = 0; + return block_size; +} + +ArenaImpl::ArenaImpl(size_t block_size) + : kBlockSize(OptimizeBlockSize(block_size)) { + assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize && + kBlockSize % kAlignUnit == 0); } ArenaImpl::~ArenaImpl() { - for (size_t i = 0; i < blocks_.size(); i++) { - delete[] blocks_[i]; + for (const auto& block : blocks_) { + delete[] block; } } -char* ArenaImpl::AllocateFallback(size_t bytes) { - if (bytes > block_size_ / 4) { +char* ArenaImpl::AllocateFallback(size_t bytes, bool aligned) { + if (bytes > kBlockSize / 4) { // Object is more than a quarter of our block size. Allocate it separately // to avoid wasting too much space in leftover bytes. - char* result = AllocateNewBlock(bytes); - return result; + return AllocateNewBlock(bytes); } // We waste the remaining space in the current block. - alloc_ptr_ = AllocateNewBlock(block_size_); - alloc_bytes_remaining_ = block_size_; + auto block_head = AllocateNewBlock(kBlockSize); + alloc_bytes_remaining_ = kBlockSize - bytes; - char* result = alloc_ptr_; - alloc_ptr_ += bytes; - alloc_bytes_remaining_ -= bytes; - return result; + if (aligned) { + aligned_alloc_ptr_ = block_head + bytes; + unaligned_alloc_ptr_ = block_head + kBlockSize; + return block_head; + } else { + aligned_alloc_ptr_ = block_head; + unaligned_alloc_ptr_ = block_head + kBlockSize - bytes; + return unaligned_alloc_ptr_; + } } char* ArenaImpl::AllocateAligned(size_t bytes) { - const int align = sizeof(void*); // We'll align to pointer size - assert((align & (align-1)) == 0); // Pointer size should be a power of 2 - size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); - size_t slop = (current_mod == 0 ? 0 : align - current_mod); + assert((kAlignUnit & (kAlignUnit - 1)) == + 0); // Pointer size should be a power of 2 + size_t current_mod = + reinterpret_cast(aligned_alloc_ptr_) & (kAlignUnit - 1); + size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod); size_t needed = bytes + slop; char* result; if (needed <= alloc_bytes_remaining_) { - result = alloc_ptr_ + slop; - alloc_ptr_ += needed; + result = aligned_alloc_ptr_ + slop; + aligned_alloc_ptr_ += needed; alloc_bytes_remaining_ -= needed; } else { // AllocateFallback always returned aligned memory - result = AllocateFallback(bytes); + result = AllocateFallback(bytes, true /* aligned */); } - assert((reinterpret_cast(result) & (align-1)) == 0); + assert((reinterpret_cast(result) & (kAlignUnit - 1)) == 0); return result; } char* ArenaImpl::AllocateNewBlock(size_t block_bytes) { - char* result = new char[block_bytes]; + char* block = new char[block_bytes]; blocks_memory_ += block_bytes; - blocks_.push_back(result); - return result; + blocks_.push_back(block); + return block; } } // namespace rocksdb diff --git a/util/arena_impl.h b/util/arena_impl.h index b5a684247..538385ccc 100644 --- a/util/arena_impl.h +++ b/util/arena_impl.h @@ -22,49 +22,54 @@ namespace rocksdb { class ArenaImpl : public Arena { public: + // No copying allowed + ArenaImpl(const ArenaImpl&) = delete; + void operator=(const ArenaImpl&) = delete; + + static const size_t kMinBlockSize; + static const size_t kMaxBlockSize; + explicit ArenaImpl(size_t block_size = kMinBlockSize); virtual ~ArenaImpl(); - virtual char* Allocate(size_t bytes); + virtual char* Allocate(size_t bytes) override; - virtual char* AllocateAligned(size_t bytes); + virtual char* AllocateAligned(size_t bytes) override; // Returns an estimate of the total memory usage of data allocated - // by the arena (including space allocated but not yet used for user + // by the arena (exclude the space allocated but not yet used for future // allocations). - // - // TODO: Do we need to exclude space allocated but not used? virtual const size_t ApproximateMemoryUsage() { - return blocks_memory_ + blocks_.capacity() * sizeof(char*); + return blocks_memory_ + blocks_.capacity() * sizeof(char*) - + alloc_bytes_remaining_; } - virtual const size_t MemoryAllocatedBytes() { + virtual const size_t MemoryAllocatedBytes() override { return blocks_memory_; } private: - char* AllocateFallback(size_t bytes); + // Number of bytes allocated in one block + const size_t kBlockSize; + // Array of new[] allocated memory blocks + typedef std::vector Blocks; + Blocks blocks_; + + // Stats for current active block. + // For each block, we allocate aligned memory chucks from one end and + // allocate unaligned memory chucks from the other end. Otherwise the + // memory waste for alignment will be higher if we allocate both types of + // memory from one direction. + char* unaligned_alloc_ptr_ = nullptr; + char* aligned_alloc_ptr_ = nullptr; + // How many bytes left in currently active block? + size_t alloc_bytes_remaining_ = 0; + + char* AllocateFallback(size_t bytes, bool aligned); char* AllocateNewBlock(size_t block_bytes); - static const size_t kMinBlockSize = 4096; - static const size_t kMaxBlockSize = 2 << 30; - - // Number of bytes allocated in one block - size_t block_size_; - - // Allocation state - char* alloc_ptr_; - size_t alloc_bytes_remaining_; - - // Array of new[] allocated memory blocks - std::vector blocks_; - // Bytes of memory in blocks allocated so far - size_t blocks_memory_; - - // No copying allowed - ArenaImpl(const ArenaImpl&); - void operator=(const ArenaImpl&); + size_t blocks_memory_ = 0; }; inline char* ArenaImpl::Allocate(size_t bytes) { @@ -73,12 +78,16 @@ inline char* ArenaImpl::Allocate(size_t bytes) { // them for our internal use). assert(bytes > 0); if (bytes <= alloc_bytes_remaining_) { - char* result = alloc_ptr_; - alloc_ptr_ += bytes; + unaligned_alloc_ptr_ -= bytes; alloc_bytes_remaining_ -= bytes; - return result; + return unaligned_alloc_ptr_; } - return AllocateFallback(bytes); + return AllocateFallback(bytes, false /* unaligned */); } +// check and adjust the block_size so that the return value is +// 1. in the range of [kMinBlockSize, kMaxBlockSize]. +// 2. the multiple of align unit. +extern size_t OptimizeBlockSize(size_t block_size); + } // namespace rocksdb diff --git a/util/arena_test.cc b/util/arena_test.cc index 12aa7f7fe..4a3d1bd43 100644 --- a/util/arena_test.cc +++ b/util/arena_test.cc @@ -57,8 +57,33 @@ TEST(ArenaImplTest, MemoryAllocatedBytes) { ASSERT_EQ(arena_impl.MemoryAllocatedBytes(), expected_memory_allocated); } +// Make sure we didn't count the allocate but not used memory space in +// Arena::ApproximateMemoryUsage() +TEST(ArenaImplTest, ApproximateMemoryUsageTest) { + const size_t kBlockSize = 4096; + const size_t kEntrySize = kBlockSize / 8; + ArenaImpl arena(kBlockSize); + ASSERT_EQ(0, arena.ApproximateMemoryUsage()); + + auto num_blocks = kBlockSize / kEntrySize; + + // first allocation + arena.AllocateAligned(kEntrySize); + auto mem_usage = arena.MemoryAllocatedBytes(); + ASSERT_EQ(mem_usage, kBlockSize); + auto usage = arena.ApproximateMemoryUsage(); + ASSERT_LT(usage, mem_usage); + for (size_t i = 1; i < num_blocks; ++i) { + arena.AllocateAligned(kEntrySize); + ASSERT_EQ(mem_usage, arena.MemoryAllocatedBytes()); + ASSERT_EQ(arena.ApproximateMemoryUsage(), usage + kEntrySize); + usage = arena.ApproximateMemoryUsage(); + } + ASSERT_GT(usage, mem_usage); +} + TEST(ArenaImplTest, Simple) { - std::vector > allocated; + std::vector> allocated; ArenaImpl arena_impl; const int N = 100000; size_t bytes = 0; @@ -68,8 +93,9 @@ TEST(ArenaImplTest, Simple) { if (i % (N / 10) == 0) { s = i; } else { - s = rnd.OneIn(4000) ? rnd.Uniform(6000) : - (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); + s = rnd.OneIn(4000) + ? rnd.Uniform(6000) + : (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); } if (s == 0) { // Our arena disallows size 0 allocations. @@ -89,7 +115,7 @@ TEST(ArenaImplTest, Simple) { bytes += s; allocated.push_back(std::make_pair(s, r)); ASSERT_GE(arena_impl.ApproximateMemoryUsage(), bytes); - if (i > N/10) { + if (i > N / 10) { ASSERT_LE(arena_impl.ApproximateMemoryUsage(), bytes * 1.10); } }