From 0920bf4e684cde9747470faf5adae872d176ba21 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Fri, 1 Mar 2019 15:41:55 -0800 Subject: [PATCH] Revert "Remove PlainTable's feature store_index_in_file (#4914)" (#5034) Summary: This reverts commit ee1818081ff4ca2a49a48cb4ca5b97665b8dcddf. We are not ready to deprecate this feature. revert it for now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5034 Differential Revision: D14287246 Pulled By: siying fbshipit-source-id: e4beafdeaee1c94364fdaa6ba198218d158339f7 --- HISTORY.md | 1 - db/plain_table_db_test.cc | 78 +++++++++-- include/rocksdb/table.h | 3 +- java/rocksjni/table.cc | 4 +- .../java/org/rocksdb/PlainTableConfig.java | 24 ++-- .../org/rocksdb/PlainTableConfigTest.java | 8 ++ options/options_test.cc | 1 + table/plain_table_builder.cc | 75 ++++++++++- table/plain_table_builder.h | 16 ++- table/plain_table_factory.cc | 18 ++- table/plain_table_factory.h | 3 +- table/plain_table_reader.cc | 124 ++++++++++++++---- 12 files changed, 302 insertions(+), 53 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 1fe0335e1..e65a603b7 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -31,7 +31,6 @@ * With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it. * Change time resolution in FileOperationInfo. * Deleting Blob files also go through SStFileManager. -* Remove PlainTable's store_index_in_file feature. When opening an existing DB with index in SST files, the index and bloom filter will still be rebuild while SST files are opened, in the same way as there is no index in the file. * Remove CuckooHash memtable. * The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio. * Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead. diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 6c9057164..2dd0cff0b 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -134,6 +134,7 @@ class PlainTableDBTest : public testing::Test, plain_table_options.huge_page_tlb_size = 0; plain_table_options.encoding_type = kPrefix; plain_table_options.full_scan_mode = false; + plain_table_options.store_index_in_file = false; options.table_factory.reset(NewPlainTableFactory(plain_table_options)); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true)); @@ -271,7 +272,8 @@ class TestPlainTableReader : public PlainTableReader { std::unique_ptr&& file, const ImmutableCFOptions& ioptions, const SliceTransform* prefix_extractor, - bool* expect_bloom_not_match, uint32_t column_family_id, + bool* expect_bloom_not_match, bool store_index_in_file, + uint32_t column_family_id, const std::string& column_family_name) : PlainTableReader(ioptions, std::move(file), env_options, icomparator, encoding_type, file_size, table_properties, @@ -288,6 +290,17 @@ class TestPlainTableReader : public PlainTableReader { TableProperties* props = const_cast(table_properties); EXPECT_EQ(column_family_id, static_cast(props->column_family_id)); EXPECT_EQ(column_family_name, props->column_family_name); + if (store_index_in_file) { + auto bloom_version_ptr = props->user_collected_properties.find( + PlainTablePropertyNames::kBloomVersion); + EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end()); + EXPECT_EQ(bloom_version_ptr->second, std::string("1")); + if (ioptions.bloom_locality > 0) { + auto num_blocks_ptr = props->user_collected_properties.find( + PlainTablePropertyNames::kNumBloomBlocks); + EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end()); + } + } } ~TestPlainTableReader() override {} @@ -316,6 +329,7 @@ class TestPlainTableFactory : public PlainTableFactory { bloom_bits_per_key_(options.bloom_bits_per_key), hash_table_ratio_(options.hash_table_ratio), index_sparseness_(options.index_sparseness), + store_index_in_file_(options.store_index_in_file), expect_bloom_not_match_(expect_bloom_not_match), column_family_id_(column_family_id), column_family_name_(std::move(column_family_name)) {} @@ -332,6 +346,22 @@ class TestPlainTableFactory : public PlainTableFactory { true /* compression_type_missing */); EXPECT_TRUE(s.ok()); + if (store_index_in_file_) { + BlockHandle bloom_block_handle; + s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, + table_reader_options.ioptions, + BloomBlockBuilder::kBloomBlock, &bloom_block_handle, + /* compression_type_missing */ true); + EXPECT_TRUE(s.ok()); + + BlockHandle index_block_handle; + s = FindMetaBlock(file.get(), file_size, kPlainTableMagicNumber, + table_reader_options.ioptions, + PlainTableIndexBuilder::kPlainTableIndexBlock, + &index_block_handle, /* compression_type_missing */ true); + EXPECT_TRUE(s.ok()); + } + auto& user_props = props->user_collected_properties; auto encoding_type_prop = user_props.find(PlainTablePropertyNames::kEncodingType); @@ -345,7 +375,7 @@ class TestPlainTableFactory : public PlainTableFactory { bloom_bits_per_key_, hash_table_ratio_, index_sparseness_, props, std::move(file), table_reader_options.ioptions, table_reader_options.prefix_extractor, expect_bloom_not_match_, - column_family_id_, column_family_name_)); + store_index_in_file_, column_family_id_, column_family_name_)); *table = std::move(new_reader); return s; @@ -355,6 +385,7 @@ class TestPlainTableFactory : public PlainTableFactory { int bloom_bits_per_key_; double hash_table_ratio_; size_t index_sparseness_; + bool store_index_in_file_; bool* expect_bloom_not_match_; const uint32_t column_family_id_; const std::string column_family_name_; @@ -364,8 +395,10 @@ TEST_P(PlainTableDBTest, Flush) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (EncodingType encoding_type : {kPlain, kPrefix}) { - for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { - for (int total_order = 0; total_order <= 1; total_order++) { + for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { + for (int total_order = 0; total_order <= 1; total_order++) { + for (int store_index_in_file = 0; store_index_in_file <= 1; + ++store_index_in_file) { Options options = CurrentOptions(); options.create_if_missing = true; // Set only one bucket to force bucket conflict. @@ -381,6 +414,7 @@ TEST_P(PlainTableDBTest, Flush) { plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; plain_table_options.full_scan_mode = false; + plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset( NewPlainTableFactory(plain_table_options)); @@ -393,6 +427,7 @@ TEST_P(PlainTableDBTest, Flush) { plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; plain_table_options.full_scan_mode = false; + plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset( NewPlainTableFactory(plain_table_options)); @@ -418,15 +453,22 @@ TEST_P(PlainTableDBTest, Flush) { auto row = ptc.begin(); auto tp = row->second; - ASSERT_EQ(total_order ? "4" : "12", - (tp->user_collected_properties) - .at("plain_table_hash_table_size")); - ASSERT_EQ( - "0", - (tp->user_collected_properties).at("plain_table_sub_index_size")); + if (!store_index_in_file) { + ASSERT_EQ(total_order ? "4" : "12", + (tp->user_collected_properties) + .at("plain_table_hash_table_size")); + ASSERT_EQ("0", (tp->user_collected_properties) + .at("plain_table_sub_index_size")); + } else { + ASSERT_EQ("0", (tp->user_collected_properties) + .at("plain_table_hash_table_size")); + ASSERT_EQ("0", (tp->user_collected_properties) + .at("plain_table_sub_index_size")); + } ASSERT_EQ("v3", Get("1000000000000foo")); ASSERT_EQ("v2", Get("0000000000000bar")); } + } } } } @@ -436,11 +478,19 @@ TEST_P(PlainTableDBTest, Flush2) { for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024; huge_page_tlb_size += 2 * 1024 * 1024) { for (EncodingType encoding_type : {kPlain, kPrefix}) { - for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { - for (int total_order = 0; total_order <= 1; total_order++) { + for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) { + for (int total_order = 0; total_order <= 1; total_order++) { + for (int store_index_in_file = 0; store_index_in_file <= 1; + ++store_index_in_file) { if (encoding_type == kPrefix && total_order) { continue; } + if (!bloom_bits && store_index_in_file) { + continue; + } + if (total_order && store_index_in_file) { + continue; + } bool expect_bloom_not_match = false; Options options = CurrentOptions(); options.create_if_missing = true; @@ -459,6 +509,7 @@ TEST_P(PlainTableDBTest, Flush2) { plain_table_options.bloom_bits_per_key = bloom_bits; plain_table_options.huge_page_tlb_size = huge_page_tlb_size; plain_table_options.encoding_type = encoding_type; + plain_table_options.store_index_in_file = store_index_in_file; options.table_factory.reset(new TestPlainTableFactory( &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */, kDefaultColumnFamilyName)); @@ -497,8 +548,9 @@ TEST_P(PlainTableDBTest, Flush2) { } expect_bloom_not_match = false; } - } } + } + } } } } diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 7e64c341b..c873271b5 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -351,11 +351,10 @@ struct PlainTableOptions { // using the index. bool full_scan_mode = false; - // THIS FEATURE IS REMOVED. // @store_index_in_file: compute plain table index and bloom filter during // file building and store it in file. When reading // file, index will be mmaped instead of recomputation. - // bool store_index_in_file = false; + bool store_index_in_file = false; }; // -- Plain Table with prefix-only seek diff --git a/java/rocksjni/table.cc b/java/rocksjni/table.cc index 5e6db28fc..1ccc550ab 100644 --- a/java/rocksjni/table.cc +++ b/java/rocksjni/table.cc @@ -21,7 +21,8 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size, jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness, - jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode) { + jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode, + jboolean jstore_index_in_file) { rocksdb::PlainTableOptions options = rocksdb::PlainTableOptions(); options.user_key_len = jkey_size; options.bloom_bits_per_key = jbloom_bits_per_key; @@ -30,6 +31,7 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( options.huge_page_tlb_size = jhuge_page_tlb_size; options.encoding_type = static_cast(jencoding_type); options.full_scan_mode = jfull_scan_mode; + options.store_index_in_file = jstore_index_in_file; return reinterpret_cast(rocksdb::NewPlainTableFactory(options)); } diff --git a/java/src/main/java/org/rocksdb/PlainTableConfig.java b/java/src/main/java/org/rocksdb/PlainTableConfig.java index 7ebfaf1b9..c09998167 100644 --- a/java/src/main/java/org/rocksdb/PlainTableConfig.java +++ b/java/src/main/java/org/rocksdb/PlainTableConfig.java @@ -21,6 +21,8 @@ public class PlainTableConfig extends TableFormatConfig { public static final EncodingType DEFAULT_ENCODING_TYPE = EncodingType.kPlain; public static final boolean DEFAULT_FULL_SCAN_MODE = false; + public static final boolean DEFAULT_STORE_INDEX_IN_FILE + = false; public PlainTableConfig() { keySize_ = VARIABLE_LENGTH; @@ -30,6 +32,7 @@ public class PlainTableConfig extends TableFormatConfig { hugePageTlbSize_ = DEFAULT_HUGE_TLB_SIZE; encodingType_ = DEFAULT_ENCODING_TYPE; fullScanMode_ = DEFAULT_FULL_SCAN_MODE; + storeIndexInFile_ = DEFAULT_STORE_INDEX_IN_FILE; } /** @@ -208,10 +211,9 @@ public class PlainTableConfig extends TableFormatConfig { * @param storeIndexInFile value indicating if index shall * be stored in a file * @return the reference to the current config. - * @deprecated */ - @Deprecated public PlainTableConfig setStoreIndexInFile(boolean storeIndexInFile) { + this.storeIndexInFile_ = storeIndexInFile; return this; } @@ -220,20 +222,23 @@ public class PlainTableConfig extends TableFormatConfig { * in a file. * * @return currently set value for store index in file. - * @deprecated */ - @Deprecated public boolean storeIndexInFile() { - return false; + return storeIndexInFile_; } @Override protected long newTableFactoryHandle() { - return newTableFactoryHandle(keySize_, bloomBitsPerKey_, hashTableRatio_, indexSparseness_, - hugePageTlbSize_, encodingType_.getValue(), fullScanMode_); + return newTableFactoryHandle(keySize_, bloomBitsPerKey_, + hashTableRatio_, indexSparseness_, hugePageTlbSize_, + encodingType_.getValue(), fullScanMode_, + storeIndexInFile_); } - private native long newTableFactoryHandle(int keySize, int bloomBitsPerKey, double hashTableRatio, - int indexSparseness, int hugePageTlbSize, byte encodingType, boolean fullScanMode); + private native long newTableFactoryHandle( + int keySize, int bloomBitsPerKey, + double hashTableRatio, int indexSparseness, + int hugePageTlbSize, byte encodingType, + boolean fullScanMode, boolean storeIndexInFile); private int keySize_; private int bloomBitsPerKey_; @@ -242,4 +247,5 @@ public class PlainTableConfig extends TableFormatConfig { private int hugePageTlbSize_; private EncodingType encodingType_; private boolean fullScanMode_; + private boolean storeIndexInFile_; } diff --git a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java b/java/src/test/java/org/rocksdb/PlainTableConfigTest.java index 3dafef4c7..dcb6cc39f 100644 --- a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java +++ b/java/src/test/java/org/rocksdb/PlainTableConfigTest.java @@ -70,6 +70,14 @@ public class PlainTableConfigTest { plainTableConfig.setFullScanMode(true); assertThat(plainTableConfig.fullScanMode()).isTrue(); } + @Test + public void storeIndexInFile() { + PlainTableConfig plainTableConfig = new PlainTableConfig(); + plainTableConfig.setStoreIndexInFile(true); + assertThat(plainTableConfig.storeIndexInFile()). + isTrue(); + } + @Test public void plainTableConfig() { try(final Options opt = new Options()) { diff --git a/options/options_test.cc b/options/options_test.cc index f700d8d65..b7076c8bd 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -662,6 +662,7 @@ TEST_F(OptionsTest, GetPlainTableOptionsFromString) { ASSERT_EQ(new_opt.huge_page_tlb_size, 4); ASSERT_EQ(new_opt.encoding_type, EncodingType::kPrefix); ASSERT_TRUE(new_opt.full_scan_mode); + ASSERT_TRUE(new_opt.store_index_in_file); // unknown option ASSERT_NOK(GetPlainTableOptionsFromString(table_opt, diff --git a/table/plain_table_builder.cc b/table/plain_table_builder.cc index 1b1058b4c..453b6c768 100644 --- a/table/plain_table_builder.cc +++ b/table/plain_table_builder.cc @@ -20,6 +20,7 @@ #include "table/plain_table_factory.h" #include "db/dbformat.h" #include "table/block_builder.h" +#include "table/bloom_block.h" #include "table/plain_table_index.h" #include "table/format.h" #include "table/meta_blocks.h" @@ -61,17 +62,34 @@ PlainTableBuilder::PlainTableBuilder( int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness, - const std::string& column_family_name) + uint32_t bloom_bits_per_key, const std::string& column_family_name, + uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio, + bool store_index_in_file) : ioptions_(ioptions), moptions_(moptions), + bloom_block_(num_probes), file_(file), + bloom_bits_per_key_(bloom_bits_per_key), + huge_page_tlb_size_(huge_page_tlb_size), encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(), index_sparseness), + store_index_in_file_(store_index_in_file), prefix_extractor_(moptions.prefix_extractor.get()) { + // Build index block and save it in the file if hash_table_ratio > 0 + if (store_index_in_file_) { + assert(hash_table_ratio > 0 || IsTotalOrderMode()); + index_builder_.reset(new PlainTableIndexBuilder( + &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness, + hash_table_ratio, huge_page_tlb_size_)); + properties_.user_collected_properties + [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use + } + properties_.fixed_key_len = user_key_len; // for plain table, we put all the data in a big chuck. properties_.num_data_blocks = 1; + // Fill it later if store_index_in_file_ == true properties_.index_size = 0; properties_.filter_size = 0; // To support roll-back to previous version, now still use version 0 for @@ -112,11 +130,26 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { return; } + // Store key hash + if (store_index_in_file_) { + if (moptions_.prefix_extractor == nullptr) { + keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key)); + } else { + Slice prefix = + moptions_.prefix_extractor->Transform(internal_key.user_key); + keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix)); + } + } + // Write value assert(offset_ <= std::numeric_limits::max()); + auto prev_offset = static_cast(offset_); // Write out the key encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, &meta_bytes_buf_size); + if (SaveIndexInFile()) { + index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset); + } // Write value length uint32_t value_size = static_cast(value.size()); @@ -162,6 +195,46 @@ Status PlainTableBuilder::Finish() { MetaIndexBuilder meta_index_builer; + if (store_index_in_file_ && (properties_.num_entries > 0)) { + assert(properties_.num_entries <= std::numeric_limits::max()); + Status s; + BlockHandle bloom_block_handle; + if (bloom_bits_per_key_ > 0) { + bloom_block_.SetTotalBits( + &arena_, + static_cast(properties_.num_entries) * bloom_bits_per_key_, + ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log); + + PutVarint32(&properties_.user_collected_properties + [PlainTablePropertyNames::kNumBloomBlocks], + bloom_block_.GetNumBlocks()); + + bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_); + + Slice bloom_finish_result = bloom_block_.Finish(); + + properties_.filter_size = bloom_finish_result.size(); + s = WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle); + + if (!s.ok()) { + return s; + } + meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle); + } + BlockHandle index_block_handle; + Slice index_finish_result = index_builder_->Finish(); + + properties_.index_size = index_finish_result.size(); + s = WriteBlock(index_finish_result, file_, &offset_, &index_block_handle); + + if (!s.ok()) { + return s; + } + + meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock, + index_block_handle); + } + // Calculate bloom block size and index block size PropertyBlockBuilder property_block_builder; // -- Add basic properties diff --git a/table/plain_table_builder.h b/table/plain_table_builder.h index 5a0be80b6..ca0879a4e 100644 --- a/table/plain_table_builder.h +++ b/table/plain_table_builder.h @@ -12,6 +12,8 @@ #include "rocksdb/status.h" #include "rocksdb/table.h" #include "rocksdb/table_properties.h" +#include "table/bloom_block.h" +#include "table/plain_table_index.h" #include "table/plain_table_key_coding.h" #include "table/table_builder.h" @@ -35,7 +37,10 @@ class PlainTableBuilder: public TableBuilder { int_tbl_prop_collector_factories, uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_size, EncodingType encoding_type, - size_t index_sparseness, const std::string& column_family_name); + size_t index_sparseness, uint32_t bloom_bits_per_key, + const std::string& column_family_name, uint32_t num_probes = 6, + size_t huge_page_tlb_size = 0, double hash_table_ratio = 0, + bool store_index_in_file = false); // REQUIRES: Either Finish() or Abandon() has been called. ~PlainTableBuilder(); @@ -69,6 +74,8 @@ class PlainTableBuilder: public TableBuilder { TableProperties GetTableProperties() const override { return properties_; } + bool SaveIndexInFile() const { return store_index_in_file_; } + private: Arena arena_; const ImmutableCFOptions& ioptions_; @@ -76,12 +83,19 @@ class PlainTableBuilder: public TableBuilder { std::vector> table_properties_collectors_; + BloomBlockBuilder bloom_block_; + std::unique_ptr index_builder_; + WritableFileWriter* file_; uint64_t offset_ = 0; + uint32_t bloom_bits_per_key_; + size_t huge_page_tlb_size_; Status status_; TableProperties properties_; PlainTableKeyEncoder encoder_; + bool store_index_in_file_; + std::vector keys_or_prefixes_hashes_; bool closed_ = false; // Either Finish() or Abandon() has been called. diff --git a/table/plain_table_factory.cc b/table/plain_table_factory.cc index a27f22edf..a6e59c142 100644 --- a/table/plain_table_factory.cc +++ b/table/plain_table_factory.cc @@ -42,8 +42,10 @@ TableBuilder* PlainTableFactory::NewTableBuilder( table_builder_options.ioptions, table_builder_options.moptions, table_builder_options.int_tbl_prop_collector_factories, column_family_id, file, table_options_.user_key_len, table_options_.encoding_type, - table_options_.index_sparseness, - table_builder_options.column_family_name); + table_options_.index_sparseness, table_options_.bloom_bits_per_key, + table_builder_options.column_family_name, 6, + table_options_.huge_page_tlb_size, table_options_.hash_table_ratio, + table_options_.store_index_in_file); } std::string PlainTableFactory::GetPrintableTableOptions() const { @@ -55,15 +57,27 @@ std::string PlainTableFactory::GetPrintableTableOptions() const { snprintf(buffer, kBufferSize, " user_key_len: %u\n", table_options_.user_key_len); ret.append(buffer); + snprintf(buffer, kBufferSize, " bloom_bits_per_key: %d\n", + table_options_.bloom_bits_per_key); + ret.append(buffer); + snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n", + table_options_.hash_table_ratio); + ret.append(buffer); snprintf(buffer, kBufferSize, " index_sparseness: %" ROCKSDB_PRIszt "\n", table_options_.index_sparseness); ret.append(buffer); + snprintf(buffer, kBufferSize, " huge_page_tlb_size: %" ROCKSDB_PRIszt "\n", + table_options_.huge_page_tlb_size); + ret.append(buffer); snprintf(buffer, kBufferSize, " encoding_type: %d\n", table_options_.encoding_type); ret.append(buffer); snprintf(buffer, kBufferSize, " full_scan_mode: %d\n", table_options_.full_scan_mode); ret.append(buffer); + snprintf(buffer, kBufferSize, " store_index_in_file: %d\n", + table_options_.store_index_in_file); + ret.append(buffer); return ret; } diff --git a/table/plain_table_factory.h b/table/plain_table_factory.h index 4d7e98ca5..157e3acda 100644 --- a/table/plain_table_factory.h +++ b/table/plain_table_factory.h @@ -204,7 +204,8 @@ static std::unordered_map plain_table_type_info = { {offsetof(struct PlainTableOptions, full_scan_mode), OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}, {"store_index_in_file", - {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false, 0}}}; + {offsetof(struct PlainTableOptions, store_index_in_file), + OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}}; } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/table/plain_table_reader.cc b/table/plain_table_reader.cc index 62bc906fe..5085edf1e 100644 --- a/table/plain_table_reader.cc +++ b/table/plain_table_reader.cc @@ -294,8 +294,47 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, assert(props != nullptr); table_properties_.reset(props); - // index_in_file and bloom_in_file features are deprecated. - // Even if they exist in file, ignore them and always reconstruct. + BlockContents index_block_contents; + Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, + file_size_, kPlainTableMagicNumber, ioptions_, + PlainTableIndexBuilder::kPlainTableIndexBlock, + &index_block_contents, + true /* compression_type_missing */); + + bool index_in_file = s.ok(); + + BlockContents bloom_block_contents; + bool bloom_in_file = false; + // We only need to read the bloom block if index block is in file. + if (index_in_file) { + s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, + file_size_, kPlainTableMagicNumber, ioptions_, + BloomBlockBuilder::kBloomBlock, &bloom_block_contents, + true /* compression_type_missing */); + bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0; + } + + Slice* bloom_block; + if (bloom_in_file) { + // If bloom_block_contents.allocation is not empty (which will be the case + // for non-mmap mode), it holds the alloated memory for the bloom block. + // It needs to be kept alive to keep `bloom_block` valid. + bloom_block_alloc_ = std::move(bloom_block_contents.allocation); + bloom_block = &bloom_block_contents.data; + } else { + bloom_block = nullptr; + } + + Slice* index_block; + if (index_in_file) { + // If index_block_contents.allocation is not empty (which will be the case + // for non-mmap mode), it holds the alloated memory for the index block. + // It needs to be kept alive to keep `index_block` valid. + index_block_alloc_ = std::move(index_block_contents.allocation); + index_block = &index_block_contents.data; + } else { + index_block = nullptr; + } if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) { // moptions.prefix_extractor is requried for a hash-based look-up. @@ -308,36 +347,77 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, // offset) and append it to IndexRecordList, which is a data structure created // to store them. - // Allocate bloom filter here for total order mode. - if (IsTotalOrderMode()) { - uint32_t num_bloom_bits = - static_cast(table_properties_->num_entries) * - bloom_bits_per_key; - if (num_bloom_bits > 0) { - enable_bloom_ = true; - bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality, - huge_page_tlb_size, ioptions_.info_log); + if (!index_in_file) { + // Allocate bloom filter here for total order mode. + if (IsTotalOrderMode()) { + uint32_t num_bloom_bits = + static_cast(table_properties_->num_entries) * + bloom_bits_per_key; + if (num_bloom_bits > 0) { + enable_bloom_ = true; + bloom_.SetTotalBits(&arena_, num_bloom_bits, ioptions_.bloom_locality, + huge_page_tlb_size, ioptions_.info_log); + } } + } else if (bloom_in_file) { + enable_bloom_ = true; + auto num_blocks_property = props->user_collected_properties.find( + PlainTablePropertyNames::kNumBloomBlocks); + + uint32_t num_blocks = 0; + if (num_blocks_property != props->user_collected_properties.end()) { + Slice temp_slice(num_blocks_property->second); + if (!GetVarint32(&temp_slice, &num_blocks)) { + num_blocks = 0; + } + } + // cast away const qualifier, because bloom_ won't be changed + bloom_.SetRawData( + const_cast( + reinterpret_cast(bloom_block->data())), + static_cast(bloom_block->size()) * 8, num_blocks); + } else { + // Index in file but no bloom in file. Disable bloom filter in this case. + enable_bloom_ = false; + bloom_bits_per_key = 0; } + PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_, index_sparseness, hash_table_ratio, huge_page_tlb_size); std::vector prefix_hashes; - Status s = PopulateIndexRecordList(&index_builder, &prefix_hashes); - if (!s.ok()) { - return s; + if (!index_in_file) { + s = PopulateIndexRecordList(&index_builder, &prefix_hashes); + if (!s.ok()) { + return s; + } + } else { + s = index_.InitFromRawData(*index_block); + if (!s.ok()) { + return s; + } + } + + if (!index_in_file) { + // Calculated bloom filter size and allocate memory for + // bloom filter based on the number of prefixes, then fill it. + AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(), + huge_page_tlb_size, &prefix_hashes); } - // Calculated bloom filter size and allocate memory for - // bloom filter based on the number of prefixes, then fill it. - AllocateAndFillBloom(bloom_bits_per_key, index_.GetNumPrefixes(), - huge_page_tlb_size, &prefix_hashes); // Fill two table properties. - props->user_collected_properties["plain_table_hash_table_size"] = - ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen); - props->user_collected_properties["plain_table_sub_index_size"] = - ToString(index_.GetSubIndexSize()); + if (!index_in_file) { + props->user_collected_properties["plain_table_hash_table_size"] = + ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen); + props->user_collected_properties["plain_table_sub_index_size"] = + ToString(index_.GetSubIndexSize()); + } else { + props->user_collected_properties["plain_table_hash_table_size"] = + ToString(0); + props->user_collected_properties["plain_table_sub_index_size"] = + ToString(0); + } return Status::OK(); }