diff --git a/HISTORY.md b/HISTORY.md index 7ab4a704d..d0c6b17e6 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,6 +6,7 @@ ### Public API changes * DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size +* NewPlainTableFactory instead of bunch of parameters now accepts PlainTableOptions, which is defined in include/rocksdb/table.h ## 3.3.0 (7/10/2014) diff --git a/db/c.cc b/db/c.cc index 57828550e..a1f7a8b0d 100644 --- a/db/c.cc +++ b/db/c.cc @@ -1428,9 +1428,13 @@ void rocksdb_options_set_plain_table_factory( double hash_table_ratio, size_t index_sparseness) { static rocksdb::TableFactory* factory = 0; if (!factory) { - factory = rocksdb::NewPlainTableFactory( - user_key_len, bloom_bits_per_key, - hash_table_ratio, index_sparseness); + rocksdb::PlainTableOptions options; + options.user_key_len = user_key_len; + options.bloom_bits_per_key = bloom_bits_per_key; + options.hash_table_ratio = hash_table_ratio; + options.index_sparseness = index_sparseness; + + factory = rocksdb::NewPlainTableFactory(options); } opt->rep.table_factory.reset(factory); } diff --git a/db/db_bench.cc b/db/db_bench.cc index af83487a1..86f6c640f 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -1688,8 +1688,13 @@ class Benchmark { if (bloom_bits_per_key < 0) { bloom_bits_per_key = 0; } - options.table_factory.reset( - NewPlainTableFactory(FLAGS_key_size, bloom_bits_per_key, 0.75)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = FLAGS_key_size; + plain_table_options.bloom_bits_per_key = bloom_bits_per_key; + plain_table_options.hash_table_ratio = 0.75; + options.table_factory = std::shared_ptr( + NewPlainTableFactory(plain_table_options)); } else { BlockBasedTableOptions block_based_options; if (FLAGS_use_hash_search) { diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 939a176d7..fdab81502 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -61,7 +61,18 @@ class PlainTableDBTest { // Return the current option configuration. Options CurrentOptions() { Options options; - options.table_factory.reset(NewPlainTableFactory(0, 2, 0.8, 3, 0, kPrefix)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = 2; + plain_table_options.hash_table_ratio = 0.8; + plain_table_options.index_sparseness = 3; + plain_table_options.huge_page_tlb_size = 0; + plain_table_options.encoding_type = kPrefix; + plain_table_options.full_scan_mode = false; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); + options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 3)); options.prefix_extractor.reset(NewFixedPrefixTransform(8)); options.allow_mmap_reads = true; @@ -212,16 +223,11 @@ extern const uint64_t kPlainTableMagicNumber; class TestPlainTableFactory : public PlainTableFactory { public: explicit TestPlainTableFactory(bool* expect_bloom_not_match, - uint32_t user_key_len, int bloom_bits_per_key, - double hash_table_ratio, - size_t index_sparseness, - size_t huge_page_tlb_size, - EncodingType encoding_type) - : PlainTableFactory(user_key_len, bloom_bits_per_key, hash_table_ratio, - index_sparseness, huge_page_tlb_size, encoding_type), - bloom_bits_per_key_(bloom_bits_per_key), - hash_table_ratio_(hash_table_ratio), - index_sparseness_(index_sparseness), + const PlainTableOptions& options) + : PlainTableFactory(options), + bloom_bits_per_key_(options.bloom_bits_per_key), + hash_table_ratio_(options.hash_table_ratio), + index_sparseness_(options.index_sparseness), expect_bloom_not_match_(expect_bloom_not_match) {} Status NewTableReader(const Options& options, const EnvOptions& soptions, @@ -268,11 +274,30 @@ TEST(PlainTableDBTest, Flush) { // Test index interval for the same prefix to be 1, 2 and 4 if (total_order) { options.prefix_extractor.reset(); - options.table_factory.reset(NewPlainTableFactory( - 0, bloom_bits, 0, 2, huge_page_tlb_size, encoding_type)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 2; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + plain_table_options.full_scan_mode = false; + + options.table_factory.reset( + NewPlainTableFactory(plain_table_options)); } else { - options.table_factory.reset(NewPlainTableFactory( - 0, bloom_bits, 0.75, 16, huge_page_tlb_size, encoding_type)); + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0.75; + plain_table_options.index_sparseness = 16; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + plain_table_options.full_scan_mode = false; + + options.table_factory.reset( + NewPlainTableFactory(plain_table_options)); } DestroyAndReopen(&options); @@ -315,13 +340,27 @@ TEST(PlainTableDBTest, Flush2) { // Test index interval for the same prefix to be 1, 2 and 4 if (total_order) { options.prefix_extractor = nullptr; + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 2; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, 0, bloom_bits, 0, 2, huge_page_tlb_size, - encoding_type)); + &expect_bloom_not_match, plain_table_options)); } else { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0.75; + plain_table_options.index_sparseness = 16; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, 0, bloom_bits, 0.75, 16, - huge_page_tlb_size, encoding_type)); + &expect_bloom_not_match, plain_table_options)); } DestroyAndReopen(&options); ASSERT_OK(Put("0000000000000bar", "b")); @@ -380,13 +419,28 @@ TEST(PlainTableDBTest, Iterator) { // Test index interval for the same prefix to be 1, 2 and 4 if (total_order) { options.prefix_extractor = nullptr; + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 2; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, 16, bloom_bits, 0, 2, huge_page_tlb_size, - encoding_type)); + &expect_bloom_not_match, plain_table_options)); } else { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = bloom_bits; + plain_table_options.hash_table_ratio = 0.75; + plain_table_options.index_sparseness = 16; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + plain_table_options.encoding_type = encoding_type; + options.table_factory.reset(new TestPlainTableFactory( - &expect_bloom_not_match, 16, bloom_bits, 0.75, 16, - huge_page_tlb_size, encoding_type)); + &expect_bloom_not_match, plain_table_options)); } DestroyAndReopen(&options); @@ -485,7 +539,13 @@ std::string MakeLongKey(size_t length, char c) { TEST(PlainTableDBTest, IteratorLargeKeys) { Options options = CurrentOptions(); - options.table_factory.reset(NewPlainTableFactory(0, 0, 0)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 0; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.create_if_missing = true; options.prefix_extractor.reset(); DestroyAndReopen(&options); @@ -529,7 +589,16 @@ std::string MakeLongKeyWithPrefix(size_t length, char c) { TEST(PlainTableDBTest, IteratorLargeKeysWithPrefix) { Options options = CurrentOptions(); - options.table_factory.reset(NewPlainTableFactory(16, 0, 0.8, 3, 0, kPrefix)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0.8; + plain_table_options.index_sparseness = 3; + plain_table_options.huge_page_tlb_size = 0; + plain_table_options.encoding_type = kPrefix; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.create_if_missing = true; DestroyAndReopen(&options); @@ -665,8 +734,16 @@ TEST(PlainTableDBTest, HashBucketConflict) { options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 - options.table_factory.reset( - NewPlainTableFactory(16, 0, 0, 2 ^ i, huge_page_tlb_size)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 2 ^ i; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); + DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); @@ -752,8 +829,15 @@ TEST(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) { options.comparator = ∁ // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 - options.table_factory.reset( - NewPlainTableFactory(16, 0, 0, 2 ^ i, huge_page_tlb_size)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 2 ^ i; + plain_table_options.huge_page_tlb_size = huge_page_tlb_size; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); @@ -833,7 +917,13 @@ TEST(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) { options.create_if_missing = true; // Set only one bucket to force bucket conflict. // Test index interval for the same prefix to be 1, 2 and 4 - options.table_factory.reset(NewPlainTableFactory(16, 0, 0, 5)); + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 5; + + options.table_factory.reset(NewPlainTableFactory(plain_table_options)); DestroyAndReopen(&options); ASSERT_OK(Put("5000000000000fo0", "v1")); ASSERT_OK(Put("5000000000000fo1", "v2")); diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index dd4e8d110..f4055d4ba 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -213,7 +213,14 @@ TEST(TablePropertiesTest, CustomizedTablePropertiesCollector) { Options options; options.table_properties_collector_factories.emplace_back( new RegularKeysStartWithAFactory()); - options.table_factory = std::make_shared(8, 8, 0); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 8; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + + options.table_factory = + std::make_shared(plain_table_options); test::PlainInternalKeyComparator ikc(options.comparator); TestCustomizedTablePropertiesCollector(kPlainTableMagicNumber, true, options, ikc); @@ -299,11 +306,15 @@ TEST(TablePropertiesTest, InternalKeyPropertiesCollector) { true /* not sanitize */, std::make_shared() ); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 8; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + TestInternalKeyPropertiesCollector( - kPlainTableMagicNumber, - false /* not sanitize */, - std::make_shared(8, 8, 0) - ); + kPlainTableMagicNumber, false /* not sanitize */, + std::make_shared(plain_table_options)); } } // namespace rocksdb diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 7f75e865b..f2dc0063d 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -121,31 +121,39 @@ struct PlainTablePropertyNames { static const std::string kEncodingType; }; -// -- Plain Table with prefix-only seek -// For this factory, you need to set Options.prefix_extrator properly to make it -// work. Look-up will starts with prefix hash lookup for key prefix. Inside the -// hash bucket found, a binary search is executed for hash conflicts. Finally, -// a linear search is used. +const uint32_t kPlainTableVariableLength = 0; + +struct PlainTableOptions { // @user_key_len: plain table has optimization for fix-sized keys, which can be // specified via user_key_len. Alternatively, you can pass // `kPlainTableVariableLength` if your keys have variable // lengths. +uint32_t user_key_len = kPlainTableVariableLength; + // @bloom_bits_per_key: the number of bits used for bloom filer per prefix. You // may disable it by passing a zero. +int bloom_bits_per_key = 10; + // @hash_table_ratio: the desired utilization of the hash table used for prefix // hashing. hash_table_ratio = number of prefixes / #buckets // in the hash table +double hash_table_ratio = 0.75; + // @index_sparseness: inside each prefix, need to build one index record for how // many keys for binary search inside each hash bucket. // For encoding type kPrefix, the value will be used when // writing to determine an interval to rewrite the full key. // It will also be used as a suggestion and satisfied when // possible. +size_t index_sparseness = 16; + // @huge_page_tlb_size: if <=0, allocate hash indexes and blooms from malloc. // Otherwise from huge page TLB. The user needs to reserve // huge pages for it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt +size_t huge_page_tlb_size = 0; + // @encoding_type: how to encode the keys. See enum EncodingType above for // the choices. The value will determine how to encode keys // when writing to a new SST file. This value will be stored @@ -153,13 +161,21 @@ struct PlainTablePropertyNames { // file, which makes it possible for users to choose different // encoding type when reopening a DB. Files with different // encoding types can co-exist in the same DB and can be read. +EncodingType encoding_type = kPlain; -const uint32_t kPlainTableVariableLength = 0; -extern TableFactory* NewPlainTableFactory( - uint32_t user_key_len = kPlainTableVariableLength, - int bloom_bits_per_prefix = 10, double hash_table_ratio = 0.75, - size_t index_sparseness = 16, size_t huge_page_tlb_size = 0, - EncodingType encoding_type = kPlain, bool full_scan_mode = false); +// @full_scan_mode: mode for reading the whole file one record by one without +// using the index. + bool full_scan_mode = false; +}; + +// -- Plain Table with prefix-only seek +// For this factory, you need to set Options.prefix_extrator properly to make it +// work. Look-up will starts with prefix hash lookup for key prefix. Inside the +// hash bucket found, a binary search is executed for hash conflicts. Finally, +// a linear search is used. + +extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options = + PlainTableOptions()); #endif // ROCKSDB_LITE diff --git a/table/plain_table_factory.cc b/table/plain_table_factory.cc index c91916ab5..fd070aad6 100644 --- a/table/plain_table_factory.cc +++ b/table/plain_table_factory.cc @@ -33,13 +33,8 @@ TableBuilder* PlainTableFactory::NewTableBuilder( index_sparseness_); } -extern TableFactory* NewPlainTableFactory( - uint32_t user_key_len, int bloom_bits_per_key, double hash_table_ratio, - size_t index_sparseness, size_t huge_page_tlb_size, - EncodingType encoding_type, bool full_scan_mode) { - return new PlainTableFactory( - user_key_len, bloom_bits_per_key, hash_table_ratio, index_sparseness, - huge_page_tlb_size, encoding_type, full_scan_mode); +extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options) { + return new PlainTableFactory(options); } const std::string PlainTablePropertyNames::kPrefixExtractorName = diff --git a/table/plain_table_factory.h b/table/plain_table_factory.h index 4b02a8e03..0c646a9e0 100644 --- a/table/plain_table_factory.h +++ b/table/plain_table_factory.h @@ -143,20 +143,15 @@ class PlainTableFactory : public TableFactory { // huge_page_tlb_size determines whether to allocate hash indexes from huge // page TLB and the page size if allocating from there. See comments of // Arena::AllocateAligned() for details. - explicit PlainTableFactory(uint32_t user_key_len = kPlainTableVariableLength, - int bloom_bits_per_key = 0, - double hash_table_ratio = 0.75, - size_t index_sparseness = 16, - size_t huge_page_tlb_size = 0, - EncodingType encoding_type = kPlain, - bool full_scan_mode = false) - : user_key_len_(user_key_len), - bloom_bits_per_key_(bloom_bits_per_key), - hash_table_ratio_(hash_table_ratio), - index_sparseness_(index_sparseness), - huge_page_tlb_size_(huge_page_tlb_size), - encoding_type_(encoding_type), - full_scan_mode_(full_scan_mode) {} + explicit PlainTableFactory(const PlainTableOptions& options = + PlainTableOptions()) + : user_key_len_(options.user_key_len), + bloom_bits_per_key_(options.bloom_bits_per_key), + hash_table_ratio_(options.hash_table_ratio), + index_sparseness_(options.index_sparseness), + huge_page_tlb_size_(options.huge_page_tlb_size), + encoding_type_(options.encoding_type), + full_scan_mode_(options.full_scan_mode) {} const char* Name() const override { return "PlainTable"; } Status NewTableReader(const Options& options, const EnvOptions& soptions, const InternalKeyComparator& internal_comparator, diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 20cb87538..a2aec8a6f 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -445,9 +445,10 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, size_t huge_page_tlb_size) { assert(props != nullptr); table_properties_.reset(props); + // options.prefix_extractor is requried for a hash-based look-up. + if ((options_.prefix_extractor.get() == nullptr) && (hash_table_ratio != 0)) { // options.prefix_extractor is requried for a hash-based look-up. - if (options_.prefix_extractor.get() == nullptr && hash_table_ratio != 0) { return Status::NotSupported( "PlainTable requires a prefix extractor enable prefix hash mode."); } diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 9aa20648a..80c8eb2f8 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -261,8 +261,13 @@ int main(int argc, char** argv) { if (FLAGS_plain_table) { options.allow_mmap_reads = true; env_options.use_mmap_reads = true; - tf = new rocksdb::PlainTableFactory(16, (FLAGS_prefix_len == 16) ? 0 : 8, - 0.75); + + rocksdb::PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 16; + plain_table_options.bloom_bits_per_key = (FLAGS_prefix_len == 16) ? 0 : 8; + plain_table_options.hash_table_ratio = 0.75; + + tf = new rocksdb::PlainTableFactory(plain_table_options); options.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform( FLAGS_prefix_len)); } else { diff --git a/table/table_test.cc b/table/table_test.cc index 03a304434..335b33cc9 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -718,8 +718,16 @@ class Harness { only_support_prefix_seek_ = false; options_.prefix_extractor = nullptr; options_.allow_mmap_reads = true; - options_.table_factory.reset( - NewPlainTableFactory(kPlainTableVariableLength, 0, 0)); + + { + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = kPlainTableVariableLength; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + + options_.table_factory.reset( + NewPlainTableFactory(plain_table_options)); + } constructor_ = new TableConstructor(options_.comparator, true); internal_comparator_.reset( new InternalKeyComparator(options_.comparator)); @@ -1493,7 +1501,12 @@ TEST(BlockBasedTableTest, BlockCacheLeak) { } TEST(PlainTableTest, BasicPlainTableProperties) { - PlainTableFactory factory(8, 8, 0); + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = 8; + plain_table_options.bloom_bits_per_key = 8; + plain_table_options.hash_table_ratio = 0; + + PlainTableFactory factory(plain_table_options); StringSink sink; Options options; InternalKeyComparator ikc(options.comparator); diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 03d0c9dfa..da59bc923 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -157,8 +157,17 @@ Status SstFileReader::SetTableOptionsByMagicNumber( } else if (table_magic_number == kPlainTableMagicNumber || table_magic_number == kLegacyPlainTableMagicNumber) { options_.allow_mmap_reads = true; - options_.table_factory.reset(NewPlainTableFactory( - kPlainTableVariableLength, 0, 0, 1, 0, kPlain, true)); + + PlainTableOptions plain_table_options; + plain_table_options.user_key_len = kPlainTableVariableLength; + plain_table_options.bloom_bits_per_key = 0; + plain_table_options.hash_table_ratio = 0; + plain_table_options.index_sparseness = 1; + plain_table_options.huge_page_tlb_size = 0; + plain_table_options.encoding_type = kPlain; + plain_table_options.full_scan_mode = true; + + options_.table_factory.reset(NewPlainTableFactory(plain_table_options)); fprintf(stdout, "Sst file format: plain table\n"); } else { char error_msg_buffer[80];