From 607628d349ace70d41286299766d0aa8087fc2b5 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 1 Sep 2016 15:28:40 -0700 Subject: [PATCH] Support ZSTD with finalized format Summary: ZSTD 1.0.0 is coming. We can finally add a support of ZSTD without worrying about compatibility. Still keep ZSTDNotFinal for compatibility reason. Test Plan: Run all tests. Run db_bench with ZSTD version with RocksDB built with ZSTD 1.0 and older. Reviewers: andrewkr, IslamAbdelRahman Reviewed By: IslamAbdelRahman Subscribers: cyan, igor, IslamAbdelRahman, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D63141 --- HISTORY.md | 1 + build_tools/dependencies.sh | 2 +- build_tools/dependencies_4.8.1.sh | 2 +- db/db_test.cc | 4 ++-- db/db_test2.cc | 6 +++--- include/rocksdb/options.h | 8 +++++++- table/block_based_table_builder.cc | 1 + table/format.cc | 1 + table/table_test.cc | 4 ++-- tools/db_bench_tool.cc | 6 +++--- tools/db_sanity_test.cc | 2 +- tools/db_stress.cc | 2 +- tools/ldb_cmd.cc | 2 +- tools/sst_dump_tool.cc | 19 +++++++++---------- util/compression.h | 11 +++++++++++ util/options_helper.h | 1 + util/options_test.cc | 6 ++++-- utilities/column_aware_encoding_exp.cc | 3 +-- 18 files changed, 51 insertions(+), 30 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ad7b4e300..e6726ba31 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,6 +4,7 @@ * CancelAllBackgroundWork() flushes all memtables for databases containing writes that have bypassed the WAL (writes issued with WriteOptions::disableWAL=true) before shutting down background threads. * Merge options source_compaction_factor, max_grandparent_overlap_bytes and expanded_compaction_factor into max_compaction_bytes. * Remove ImmutableCFOptions. +* Add a compression type ZSTD, which can work with ZSTD 0.8.0 or up. Still keep ZSTDNotFinal for compatibility reasons. ### New Features * Introduce NewClockCache, which is based on CLOCK algorithm with better concurrent performance in some cases. It can be used to replace the default LRU-based block cache and table cache. To use it, RocksDB need to be linked with TBB lib. diff --git a/build_tools/dependencies.sh b/build_tools/dependencies.sh index 829903da0..9d198591d 100644 --- a/build_tools/dependencies.sh +++ b/build_tools/dependencies.sh @@ -6,7 +6,7 @@ SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfc ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-5-glibc-2.23/9bc6787 BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-5-glibc-2.23/9bc6787 LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-5-glibc-2.23/9bc6787 -ZSTD_BASE=/mnt/gvfs/third-party2/zstd/bd13f49da8633897105b2b8541b106d487ce54ca/0.7.5/gcc-5-glibc-2.23/9bc6787 +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-5-glibc-2.23/9bc6787 GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-5-glibc-2.23/1c32b4b NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-5-glibc-2.23/9bc6787 diff --git a/build_tools/dependencies_4.8.1.sh b/build_tools/dependencies_4.8.1.sh index 50d4b3a72..118b042d1 100644 --- a/build_tools/dependencies_4.8.1.sh +++ b/build_tools/dependencies_4.8.1.sh @@ -6,7 +6,7 @@ SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfc ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-4.8.1-glibc-2.17/c3f970a -ZSTD_BASE=/mnt/gvfs/third-party2/zstd/bd13f49da8633897105b2b8541b106d487ce54ca/0.7.5/gcc-4.8.1-glibc-2.17/c3f970a +ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-4.8.1-glibc-2.17/c3f970a GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-4.8.1-glibc-2.17/8d31e51 NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a diff --git a/db/db_test.cc b/db/db_test.cc index f7a5072fa..f766c0804 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1193,7 +1193,7 @@ bool MinLevelToCompress(CompressionType& type, Options& options, int wbits, type = kXpressCompression; fprintf(stderr, "using xpress\n"); } else if (ZSTD_Supported()) { - type = kZSTDNotFinalCompression; + type = kZSTD; fprintf(stderr, "using ZSTD\n"); } else { fprintf(stderr, "skipping test, compression disabled\n"); @@ -4708,7 +4708,7 @@ TEST_F(DBTest, CompressionStatsTest) { type = kXpressCompression; fprintf(stderr, "using xpress\n"); } else if (ZSTD_Supported()) { - type = kZSTDNotFinalCompression; + type = kZSTD; fprintf(stderr, "using ZSTD\n"); } else { fprintf(stderr, "skipping test, compression disabled\n"); diff --git a/db/db_test2.cc b/db/db_test2.cc index 24ed982a9..95383daa8 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -961,9 +961,9 @@ TEST_F(DBTest2, PresetCompressionDict) { compression_types.push_back(kLZ4Compression); compression_types.push_back(kLZ4HCCompression); #endif // LZ4_VERSION_NUMBER >= 10400 -#if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ - compression_types.push_back(kZSTDNotFinalCompression); -#endif // ZSTD_VERSION_NUMBER >= 500 + if (ZSTD_Supported()) { + compression_types.push_back(kZSTD); + } for (auto compression_type : compression_types) { options.compression = compression_type; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 93b996d5d..2bb76a95c 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -63,7 +63,13 @@ enum CompressionType : unsigned char { kLZ4Compression = 0x4, kLZ4HCCompression = 0x5, kXpressCompression = 0x6, - // zstd format is not finalized yet so it's subject to changes. + kZSTD = 0x7, + + // Only use kZSTDNotFinalCompression if you have to use ZSTD lib older than + // 0.8.0 or consider a possibility of downgrading the service or copying + // the database files to another service running with an older version of + // RocksDB that doesn't have kZSTD. Otherwise, you should use kZSTD. We will + // eventually remove the option from the public API. kZSTDNotFinalCompression = 0x40, // kDisableCompressionOption is used to disable some compression options. diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index f042134b4..f0779f9ca 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -377,6 +377,7 @@ Slice CompressBlock(const Slice& raw, return *compressed_output; } break; + case kZSTD: case kZSTDNotFinalCompression: if (ZSTD_Compress(compression_options, raw.data(), raw.size(), compressed_output, compression_dict) && diff --git a/table/format.cc b/table/format.cc index 3675bbade..8b558df0d 100644 --- a/table/format.cc +++ b/table/format.cc @@ -498,6 +498,7 @@ Status UncompressBlockContentsForCompressionType( *contents = BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); break; + case kZSTD: case kZSTDNotFinalCompression: ubuf.reset(ZSTD_Uncompress(data, n, &decompress_size, compression_dict)); if (!ubuf) { diff --git a/table/table_test.cc b/table/table_test.cc index 16f46fa65..de885e2a8 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -588,8 +588,8 @@ static std::vector GenerateArgList() { compression_types.emplace_back(kXpressCompression, true); } if (ZSTD_Supported()) { - compression_types.emplace_back(kZSTDNotFinalCompression, false); - compression_types.emplace_back(kZSTDNotFinalCompression, true); + compression_types.emplace_back(kZSTD, false); + compression_types.emplace_back(kZSTD, true); } for (auto test_type : test_types) { diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index a3e1fd0f8..41e2b0226 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -597,7 +597,7 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { else if (!strcasecmp(ctype, "xpress")) return rocksdb::kXpressCompression; else if (!strcasecmp(ctype, "zstd")) - return rocksdb::kZSTDNotFinalCompression; + return rocksdb::kZSTD; fprintf(stdout, "Cannot parse compression type '%s'\n", ctype); return rocksdb::kSnappyCompression; // default value @@ -1771,7 +1771,7 @@ class Benchmark { ok = XPRESS_Compress(input.data(), input.size(), compressed); break; - case rocksdb::kZSTDNotFinalCompression: + case rocksdb::kZSTD: ok = ZSTD_Compress(Options().compression_opts, input.data(), input.size(), compressed); break; @@ -2606,7 +2606,7 @@ class Benchmark { &decompress_size); ok = uncompressed != nullptr; break; - case rocksdb::kZSTDNotFinalCompression: + case rocksdb::kZSTD: uncompressed = ZSTD_Uncompress(compressed.data(), compressed.size(), &decompress_size); ok = uncompressed != nullptr; diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc index 84d459993..2061f6e94 100644 --- a/tools/db_sanity_test.cc +++ b/tools/db_sanity_test.cc @@ -186,7 +186,7 @@ class SanityTestZSTDCompression : public SanityTest { public: explicit SanityTestZSTDCompression(const std::string& path) : SanityTest(path) { - options_.compression = kZSTDNotFinalCompression; + options_.compression = kZSTD; } virtual Options GetOptions() const override { return options_; } virtual std::string Name() const override { return "ZSTDCompression"; } diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 0dd6dee43..d3e7d0c93 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -375,7 +375,7 @@ enum rocksdb::CompressionType StringToCompressionType(const char* ctype) { else if (!strcasecmp(ctype, "xpress")) return rocksdb::kXpressCompression; else if (!strcasecmp(ctype, "zstd")) - return rocksdb::kZSTDNotFinalCompression; + return rocksdb::kZSTD; fprintf(stdout, "Cannot parse compression type '%s'\n", ctype); return rocksdb::kSnappyCompression; //default value diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index b56eb4c34..ccd1972ff 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -509,7 +509,7 @@ Options LDBCommand::PrepareOptionsForOpenDB() { } else if (comp == "xpress") { opt.compression = kXpressCompression; } else if (comp == "zstd") { - opt.compression = kZSTDNotFinalCompression; + opt.compression = kZSTD; } else { // Unknown compression. exec_state_ = diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index a2f8d08d9..01e3066c4 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -185,16 +185,15 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) { fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size); - std::pair compressions[] = { - { CompressionType::kNoCompression, "kNoCompression" }, - { CompressionType::kSnappyCompression, "kSnappyCompression" }, - { CompressionType::kZlibCompression, "kZlibCompression" }, - { CompressionType::kBZip2Compression, "kBZip2Compression" }, - { CompressionType::kLZ4Compression, "kLZ4Compression" }, - { CompressionType::kLZ4HCCompression, "kLZ4HCCompression" }, - { CompressionType::kXpressCompression, "kXpressCompression" }, - { CompressionType::kZSTDNotFinalCompression, "kZSTDNotFinalCompression" } - }; + std::pair compressions[] = { + {CompressionType::kNoCompression, "kNoCompression"}, + {CompressionType::kSnappyCompression, "kSnappyCompression"}, + {CompressionType::kZlibCompression, "kZlibCompression"}, + {CompressionType::kBZip2Compression, "kBZip2Compression"}, + {CompressionType::kLZ4Compression, "kLZ4Compression"}, + {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"}, + {CompressionType::kXpressCompression, "kXpressCompression"}, + {CompressionType::kZSTD, "kZSTD"}}; for (auto& i : compressions) { if (CompressionTypeSupported(i.first)) { diff --git a/util/compression.h b/util/compression.h index 6a0e28665..6c233e0f2 100644 --- a/util/compression.h +++ b/util/compression.h @@ -79,6 +79,14 @@ inline bool XPRESS_Supported() { } inline bool ZSTD_Supported() { +#ifdef ZSTD + // ZSTD format is finalized since version 0.8.0. + return (ZSTD_versionNumber() >= 800); +#endif + return false; +} + +inline bool ZSTDNotFinal_Supported() { #ifdef ZSTD return true; #endif @@ -102,6 +110,8 @@ inline bool CompressionTypeSupported(CompressionType compression_type) { case kXpressCompression: return XPRESS_Supported(); case kZSTDNotFinalCompression: + return ZSTDNotFinal_Supported(); + case kZSTD: return ZSTD_Supported(); default: assert(false); @@ -125,6 +135,7 @@ inline std::string CompressionTypeToString(CompressionType compression_type) { return "LZ4HC"; case kXpressCompression: return "Xpress"; + case kZSTD: case kZSTDNotFinalCompression: return "ZSTD"; default: diff --git a/util/options_helper.h b/util/options_helper.h index f0b9e9b97..fce3eae5d 100644 --- a/util/options_helper.h +++ b/util/options_helper.h @@ -598,6 +598,7 @@ static std::unordered_map {"kLZ4Compression", kLZ4Compression}, {"kLZ4HCCompression", kLZ4HCCompression}, {"kXpressCompression", kXpressCompression}, + {"kZSTD", kZSTD}, {"kZSTDNotFinalCompression", kZSTDNotFinalCompression}, {"kDisableCompressionOption", kDisableCompressionOption}}; diff --git a/util/options_test.cc b/util/options_test.cc index ce58af1fd..8edb40f4a 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -56,6 +56,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { "kLZ4Compression:" "kLZ4HCCompression:" "kXpressCompression:" + "kZSTD:" "kZSTDNotFinalCompression"}, {"bottommost_compression", "kLZ4Compression"}, {"compression_opts", "4:5:6:7"}, @@ -140,7 +141,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.min_write_buffer_number_to_merge, 3); ASSERT_EQ(new_cf_opt.max_write_buffer_number_to_maintain, 99); ASSERT_EQ(new_cf_opt.compression, kSnappyCompression); - ASSERT_EQ(new_cf_opt.compression_per_level.size(), 8U); + ASSERT_EQ(new_cf_opt.compression_per_level.size(), 9U); ASSERT_EQ(new_cf_opt.compression_per_level[0], kNoCompression); ASSERT_EQ(new_cf_opt.compression_per_level[1], kSnappyCompression); ASSERT_EQ(new_cf_opt.compression_per_level[2], kZlibCompression); @@ -148,7 +149,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.compression_per_level[4], kLZ4Compression); ASSERT_EQ(new_cf_opt.compression_per_level[5], kLZ4HCCompression); ASSERT_EQ(new_cf_opt.compression_per_level[6], kXpressCompression); - ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTDNotFinalCompression); + ASSERT_EQ(new_cf_opt.compression_per_level[7], kZSTD); + ASSERT_EQ(new_cf_opt.compression_per_level[8], kZSTDNotFinalCompression); ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 4); ASSERT_EQ(new_cf_opt.compression_opts.level, 5); ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6); diff --git a/utilities/column_aware_encoding_exp.cc b/utilities/column_aware_encoding_exp.cc index 74a5b5b9c..83a2bdb9c 100644 --- a/utilities/column_aware_encoding_exp.cc +++ b/utilities/column_aware_encoding_exp.cc @@ -80,8 +80,7 @@ class ColumnAwareEncodingExp { std::unordered_map compressions = { {"kNoCompression", CompressionType::kNoCompression}, {"kZlibCompression", CompressionType::kZlibCompression}, - {"kZSTDNotFinalCompression", - CompressionType::kZSTDNotFinalCompression}}; + {"kZSTD", CompressionType::kZSTD}}; // Find Compression CompressionType compression_type = compressions[FLAGS_compression_type];