diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 8d9a883b6..232e49270 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -1302,6 +1302,22 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { #endif // ROCKSDB_LITE +class DBBlockCacheKeyTest + : public DBTestBase, + public testing::WithParamInterface> { + public: + DBBlockCacheKeyTest() + : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {} + + void SetUp() override { + use_compressed_cache_ = std::get<0>(GetParam()); + exclude_file_numbers_ = std::get<1>(GetParam()); + } + + bool use_compressed_cache_; + bool exclude_file_numbers_; +}; + // Disable LinkFile so that we can physically copy a DB using Checkpoint. // Disable file GetUniqueId to enable stable cache keys. class StableCacheKeyTestFS : public FaultInjectionTestFS { @@ -1319,121 +1335,198 @@ class StableCacheKeyTestFS : public FaultInjectionTestFS { } }; -TEST_F(DBBlockCacheTest, StableCacheKeys) { +TEST_P(DBBlockCacheKeyTest, StableCacheKeys) { std::shared_ptr test_fs{ new StableCacheKeyTestFS(env_->GetFileSystem())}; std::unique_ptr test_env{ new CompositeEnvWrapper(env_, test_fs)}; - for (bool compressed : {false, true}) { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.env = test_env.get(); + Options options = CurrentOptions(); + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.env = test_env.get(); - BlockBasedTableOptions table_options; + BlockBasedTableOptions table_options; - std::function verify_stats; - if (compressed) { - if (!Snappy_Supported()) { - fprintf(stderr, "skipping compressed test, snappy unavailable\n"); - continue; - } - options.compression = CompressionType::kSnappyCompression; - table_options.no_block_cache = true; - table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false); - verify_stats = [&options] { - // One for ordinary SST file and one for external SST file - ASSERT_EQ( - 2, options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD)); - }; - } else { - table_options.cache_index_and_filter_blocks = true; - table_options.block_cache = NewLRUCache(1 << 25, 0, false); - verify_stats = [&options] { - ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); - ASSERT_EQ(2, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); - ASSERT_EQ(2, - options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); - }; + int key_count = 0; + uint64_t expected_stat = 0; + + std::function verify_stats; + if (use_compressed_cache_) { + if (!Snappy_Supported()) { + ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support"); + return; } + options.compression = CompressionType::kSnappyCompression; + table_options.no_block_cache = true; + table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false); + verify_stats = [&options, &expected_stat] { + // One for ordinary SST file and one for external SST file + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD)); + }; + } else { + table_options.cache_index_and_filter_blocks = true; + table_options.block_cache = NewLRUCache(1 << 25, 0, false); + verify_stats = [&options, &expected_stat] { + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD)); + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD)); + ASSERT_EQ(expected_stat, + options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD)); + }; + } - table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - DestroyAndReopen(options); + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + CreateAndReopenWithCF({"koko"}, options); - // Ordinary SST file - ASSERT_OK(Put("key1", "abc")); - std::string something_compressible(500U, 'x'); - ASSERT_OK(Put("key1a", something_compressible)); - ASSERT_OK(Flush()); + if (exclude_file_numbers_) { + // Simulate something like old behavior without file numbers in properties. + // This is a "control" side of the test that also ensures safely degraded + // behavior on old files. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) { + TableProperties* props = reinterpret_cast(arg); + props->orig_file_number = 0; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + } + + std::function perform_gets = [&key_count, &expected_stat, this]() { + if (exclude_file_numbers_) { + // No cache key reuse should happen, because we can't rely on current + // file number being stable + expected_stat += key_count; + } else { + // Cache keys should be stable + expected_stat = key_count; + } + for (int i = 0; i < key_count; ++i) { + ASSERT_EQ(Get(1, Key(i)), "abc"); + } + }; + + // Ordinary SST files with same session id + const std::string something_compressible(500U, 'x'); + for (int i = 0; i < 2; ++i) { + ASSERT_OK(Put(1, Key(key_count), "abc")); + ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible)); + ASSERT_OK(Flush(1)); + ++key_count; + } #ifndef ROCKSDB_LITE - // External SST file - std::string external = dbname_ + "/external.sst"; - { - SstFileWriter sst_file_writer(EnvOptions(), options); - ASSERT_OK(sst_file_writer.Open(external)); - ASSERT_OK(sst_file_writer.Put("key2", "abc")); - ASSERT_OK(sst_file_writer.Put("key2a", something_compressible)); - ExternalSstFileInfo external_info; - ASSERT_OK(sst_file_writer.Finish(&external_info)); - IngestExternalFileOptions ingest_opts; - ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts)); - } -#else - // Another ordinary SST file - ASSERT_OK(Put("key2", "abc")); - ASSERT_OK(Put("key2a", something_compressible)); - ASSERT_OK(Flush()); + // Save an export of those ordinary SST files for later + std::string export_files_dir = dbname_ + "/exported"; + ExportImportFilesMetaData* metadata_ptr_ = nullptr; + Checkpoint* checkpoint; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir, + &metadata_ptr_)); + ASSERT_NE(metadata_ptr_, nullptr); + delete checkpoint; + checkpoint = nullptr; + + // External SST files with same session id + SstFileWriter sst_file_writer(EnvOptions(), options); + std::vector external; + for (int i = 0; i < 2; ++i) { + std::string f = dbname_ + "/external" + ToString(i) + ".sst"; + external.push_back(f); + ASSERT_OK(sst_file_writer.Open(f)); + ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc")); + ASSERT_OK( + sst_file_writer.Put(Key(key_count) + "a", something_compressible)); + ++key_count; + ExternalSstFileInfo external_info; + ASSERT_OK(sst_file_writer.Finish(&external_info)); + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts)); + } + + if (exclude_file_numbers_) { + // FIXME(peterd): figure out where these extra two ADDs are coming from + options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD, + uint64_t{0} - uint64_t{2}); + options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD, + uint64_t{0} - uint64_t{2}); + options.statistics->recordTick(BLOCK_CACHE_COMPRESSED_ADD, + uint64_t{0} - uint64_t{2}); + } #endif - ASSERT_EQ(Get("key1"), std::string("abc")); - ASSERT_EQ(Get("key2"), std::string("abc")); - verify_stats(); + perform_gets(); + verify_stats(); - // Make sure we can cache hit after re-open - Reopen(options); + // Make sure we can cache hit after re-open + ReopenWithColumnFamilies({"default", "koko"}, options); - ASSERT_EQ(Get("key1"), std::string("abc")); - ASSERT_EQ(Get("key2"), std::string("abc")); - verify_stats(); + perform_gets(); + verify_stats(); - // Make sure we can cache hit even on a full copy of the DB. Using - // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link. - // (Checkpoint not available in LITE mode to test this.) + // Make sure we can cache hit even on a full copy of the DB. Using + // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link. + // (Checkpoint not available in LITE mode to test this.) #ifndef ROCKSDB_LITE - auto db_copy_name = dbname_ + "-copy"; - Checkpoint* checkpoint; - ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); - ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name)); - delete checkpoint; + auto db_copy_name = dbname_ + "-copy"; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); + ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name)); + delete checkpoint; - Close(); - Destroy(options); - SaveAndRestore save_dbname(&dbname_, db_copy_name); - Reopen(options); + Close(); + Destroy(options); - ASSERT_EQ(Get("key1"), std::string("abc")); - ASSERT_EQ(Get("key2"), std::string("abc")); - verify_stats(); + // Switch to the DB copy + SaveAndRestore save_dbname(&dbname_, db_copy_name); + ReopenWithColumnFamilies({"default", "koko"}, options); - // And ensure that re-ingesting the same external file into a different DB - // uses same cache keys - DestroyAndReopen(options); + perform_gets(); + verify_stats(); - IngestExternalFileOptions ingest_opts; - ASSERT_OK(db_->IngestExternalFile({external}, ingest_opts)); + // And ensure that re-importing + ingesting the same files into a + // different DB uses same cache keys + DestroyAndReopen(options); - ASSERT_EQ(Get("key2"), std::string("abc")); - verify_stats(); + ColumnFamilyHandle* cfh = nullptr; + ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", + ImportColumnFamilyOptions(), + *metadata_ptr_, &cfh)); + ASSERT_NE(cfh, nullptr); + delete cfh; + cfh = nullptr; + delete metadata_ptr_; + metadata_ptr_ = nullptr; + + DestroyDB(export_files_dir, options); + + ReopenWithColumnFamilies({"default", "yoyo"}, options); + + IngestExternalFileOptions ingest_opts; + ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts)); + + if (exclude_file_numbers_) { + // FIXME(peterd): figure out where these extra two ADDs are coming from + options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD, + uint64_t{0} - uint64_t{2}); + options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD, + uint64_t{0} - uint64_t{2}); + } + + perform_gets(); + verify_stats(); #endif // !ROCKSDB_LITE - Close(); - Destroy(options); - } + Close(); + Destroy(options); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } +INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest, + ::testing::Combine(::testing::Bool(), + ::testing::Bool())); + class DBBlockCachePinningTest : public DBTestBase, public testing::WithParamInterface< diff --git a/db/event_helpers.cc b/db/event_helpers.cc index 25b239019..0073302a0 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -143,7 +143,8 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( << "fast_compression_estimated_data_size" << table_properties.fast_compression_estimated_data_size << "db_id" << table_properties.db_id << "db_session_id" - << table_properties.db_session_id; + << table_properties.db_session_id << "orig_file_number" + << table_properties.orig_file_number; // user collected properties for (const auto& prop : table_properties.readable_properties) { diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index d3e9eeace..2e605ee8d 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -33,6 +33,7 @@ struct TablePropertiesNames { static const std::string kDbId; static const std::string kDbSessionId; static const std::string kDbHostId; + static const std::string kOriginalFileNumber; static const std::string kDataSize; static const std::string kIndexSize; static const std::string kIndexPartitions; @@ -153,6 +154,9 @@ class TablePropertiesCollectorFactory { // table. struct TableProperties { public: + // the file number at creation time, or 0 for unknown. When known, + // combining with db_session_id must uniquely identify an SST file. + uint64_t orig_file_number = 0; // the total size of all data blocks. uint64_t data_size = 0; // the size of index block. diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 2b20cdc46..d2b30443e 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -324,16 +324,6 @@ struct BlockBasedTableBuilder::Rep { std::string compressed_output; std::unique_ptr flush_block_policy; - uint32_t column_family_id; - std::string column_family_name; - uint64_t creation_time = 0; - uint64_t oldest_key_time = 0; - uint64_t file_creation_time = 0; - - // DB IDs - const std::string db_id; - const std::string db_session_id; - std::string db_host_id; std::vector> table_properties_collectors; @@ -444,14 +434,6 @@ struct BlockBasedTableBuilder::Rep { flush_block_policy( table_options.flush_block_policy_factory->NewFlushBlockPolicy( table_options, data_block)), - column_family_id(tbo.column_family_id), - column_family_name(tbo.column_family_name), - creation_time(tbo.creation_time), - oldest_key_time(tbo.oldest_key_time), - file_creation_time(tbo.file_creation_time), - db_id(tbo.db_id), - db_session_id(tbo.db_session_id), - db_host_id(ioptions.db_host_id), status_ok(true), io_status_ok(true) { if (tbo.target_file_size == 0) { @@ -514,7 +496,7 @@ struct BlockBasedTableBuilder::Rep { assert(factory); table_properties_collectors.emplace_back( - factory->CreateIntTblPropCollector(column_family_id)); + factory->CreateIntTblPropCollector(tbo.column_family_id)); } table_properties_collectors.emplace_back( new BlockBasedTablePropertiesCollector( @@ -526,7 +508,17 @@ struct BlockBasedTableBuilder::Rep { } } - if (!ReifyDbHostIdProperty(ioptions.env, &db_host_id).ok()) { + // These are only needed for populating table properties + props.column_family_id = tbo.column_family_id; + props.column_family_name = tbo.column_family_name; + props.creation_time = tbo.creation_time; + props.oldest_key_time = tbo.oldest_key_time; + props.file_creation_time = tbo.file_creation_time; + props.orig_file_number = tbo.cur_file_num; + props.db_id = tbo.db_id; + props.db_session_id = tbo.db_session_id; + props.db_host_id = ioptions.db_host_id; + if (!ReifyDbHostIdProperty(ioptions.env, &props.db_host_id).ok()) { ROCKS_LOG_INFO(ioptions.logger, "db_host_id property will not be set"); } } @@ -1622,8 +1614,6 @@ void BlockBasedTableBuilder::WritePropertiesBlock( BlockHandle properties_block_handle; if (ok()) { PropertyBlockBuilder property_block_builder; - rep_->props.column_family_id = rep_->column_family_id; - rep_->props.column_family_name = rep_->column_family_name; rep_->props.filter_policy_name = rep_->table_options.filter_policy != nullptr ? rep_->table_options.filter_policy->Name() @@ -1668,9 +1658,6 @@ void BlockBasedTableBuilder::WritePropertiesBlock( !rep_->index_builder->seperator_is_key_plus_seq(); rep_->props.index_value_is_delta_encoded = rep_->use_delta_encoding_for_index_values; - rep_->props.creation_time = rep_->creation_time; - rep_->props.oldest_key_time = rep_->oldest_key_time; - rep_->props.file_creation_time = rep_->file_creation_time; if (rep_->sampled_input_data_bytes > 0) { rep_->props.slow_compression_estimated_data_size = static_cast( static_cast(rep_->sampled_output_slow_data_bytes) / @@ -1692,9 +1679,6 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->compressible_input_data_bytes + rep_->uncompressible_input_data_bytes; } - rep_->props.db_id = rep_->db_id; - rep_->props.db_session_id = rep_->db_session_id; - rep_->props.db_host_id = rep_->db_host_id; // Add basic properties property_block_builder.AddTableProperty(rep_->props); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index ee6ccf830..e51ad291a 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -630,28 +630,20 @@ Status BlockBasedTable::Open( // With properties loaded, we can set up portable/stable cache keys if // necessary info is available - std::string db_session_id = cur_db_session_id; - uint64_t file_num = cur_file_num; - if (rep->table_properties && !rep->table_properties->db_session_id.empty()) { - const auto& uprops = rep->table_properties->user_collected_properties; - auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion); - if (version_iter == uprops.end()) { - // Normal (non-external) SST file - can only use embedded db_session_id - // with current file number (which should be original file number) - if (file_num > 0) { - db_session_id = rep->table_properties->db_session_id; - } - } else { - // External (ingested) SST file - should not use current file number - // (which is changed from original), so that same file ingested into - // different DBs can share block cache entries. Although they can modify - // the embedded global_seqno, that information is not currently cached - // under these portable/stable keys. - // Note: For now, each external SST file gets its own unique session id, - // so we can use a fixed file number under than session id. - db_session_id = rep->table_properties->db_session_id; - file_num = 1; - } + std::string db_session_id; + uint64_t file_num; + if (rep->table_properties && !rep->table_properties->db_session_id.empty() && + rep->table_properties->orig_file_number > 0) { + // We must have both properties to get a stable unique id because + // CreateColumnFamilyWithImport or IngestExternalFiles can change the + // file numbers on a file. + db_session_id = rep->table_properties->db_session_id; + file_num = rep->table_properties->orig_file_number; + } else { + // We have to use transient (but unique) cache keys based on current + // identifiers. + db_session_id = cur_db_session_id; + file_num = cur_file_num; } SetupCacheKeyPrefix(rep, db_session_id, file_num); diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 15f214035..2a4e2536a 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -54,7 +54,8 @@ CuckooTableBuilder::CuckooTableBuilder( bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), uint32_t column_family_id, const std::string& column_family_name, - const std::string& db_id, const std::string& db_session_id) + const std::string& db_id, const std::string& db_session_id, + uint64_t file_number) : num_hash_func_(2), file_(file), max_hash_table_ratio_(max_hash_table_ratio), @@ -82,6 +83,7 @@ CuckooTableBuilder::CuckooTableBuilder( properties_.column_family_name = column_family_name; properties_.db_id = db_id; properties_.db_session_id = db_session_id; + properties_.orig_file_number = file_number; status_.PermitUncheckedError(); io_status_.PermitUncheckedError(); } diff --git a/table/cuckoo/cuckoo_table_builder.h b/table/cuckoo/cuckoo_table_builder.h index 8e8026487..a72d5183a 100644 --- a/table/cuckoo/cuckoo_table_builder.h +++ b/table/cuckoo/cuckoo_table_builder.h @@ -29,7 +29,8 @@ class CuckooTableBuilder: public TableBuilder { bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), uint32_t column_family_id, const std::string& column_family_name, - const std::string& db_id = "", const std::string& db_session_id = ""); + const std::string& db_id = "", const std::string& db_session_id = "", + uint64_t file_number = 0); // No copying allowed CuckooTableBuilder(const CuckooTableBuilder&) = delete; void operator=(const CuckooTableBuilder&) = delete; diff --git a/table/cuckoo/cuckoo_table_factory.cc b/table/cuckoo/cuckoo_table_factory.cc index 4fd014e97..1253c92dd 100644 --- a/table/cuckoo/cuckoo_table_factory.cc +++ b/table/cuckoo/cuckoo_table_factory.cc @@ -41,7 +41,7 @@ TableBuilder* CuckooTableFactory::NewTableBuilder( table_options_.identity_as_first_hash, nullptr /* get_slice_hash */, table_builder_options.column_family_id, table_builder_options.column_family_name, table_builder_options.db_id, - table_builder_options.db_session_id); + table_builder_options.db_session_id, table_builder_options.cur_file_num); } std::string CuckooTableFactory::GetPrintableOptions() const { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index fff1397c8..ae42b2948 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -71,6 +71,7 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start", const_cast(&props)); + Add(TablePropertiesNames::kOriginalFileNumber, props.orig_file_number); Add(TablePropertiesNames::kRawKeySize, props.raw_key_size); Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); Add(TablePropertiesNames::kDataSize, props.data_size); @@ -255,6 +256,8 @@ Status ReadProperties(const ReadOptions& read_options, auto new_table_properties = new TableProperties(); // All pre-defined properties of type uint64_t std::unordered_map predefined_uint64_properties = { + {TablePropertiesNames::kOriginalFileNumber, + &new_table_properties->orig_file_number}, {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, {TablePropertiesNames::kIndexSize, &new_table_properties->index_size}, {TablePropertiesNames::kIndexPartitions, diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 5754f7c29..61a9cec8b 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -64,7 +64,7 @@ PlainTableBuilder::PlainTableBuilder( uint32_t bloom_bits_per_key, const std::string& column_family_name, uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio, bool store_index_in_file, const std::string& db_id, - const std::string& db_session_id) + const std::string& db_session_id, uint64_t file_number) : ioptions_(ioptions), moptions_(moptions), bloom_block_(num_probes), @@ -103,6 +103,7 @@ PlainTableBuilder::PlainTableBuilder( if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) { ROCKS_LOG_INFO(ioptions_.logger, "db_host_id property will not be set"); } + properties_.orig_file_number = file_number; properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr ? moptions_.prefix_extractor->Name() : "nullptr"; diff --git a/table/plain/plain_table_builder.h b/table/plain/plain_table_builder.h index 07ed7e5dc..860554f14 100644 --- a/table/plain/plain_table_builder.h +++ b/table/plain/plain_table_builder.h @@ -45,7 +45,7 @@ class PlainTableBuilder: public TableBuilder { const std::string& column_family_name, uint32_t num_probes = 6, size_t huge_page_tlb_size = 0, double hash_table_ratio = 0, bool store_index_in_file = false, const std::string& db_id = "", - const std::string& db_session_id = ""); + const std::string& db_session_id = "", uint64_t file_number = 0); // No copying allowed PlainTableBuilder(const PlainTableBuilder&) = delete; void operator=(const PlainTableBuilder&) = delete; diff --git a/table/plain/plain_table_factory.cc b/table/plain/plain_table_factory.cc index dd345492d..90e062951 100644 --- a/table/plain/plain_table_factory.cc +++ b/table/plain/plain_table_factory.cc @@ -85,7 +85,7 @@ TableBuilder* PlainTableFactory::NewTableBuilder( table_builder_options.column_family_name, 6, table_options_.huge_page_tlb_size, table_options_.hash_table_ratio, table_options_.store_index_in_file, table_builder_options.db_id, - table_builder_options.db_session_id); + table_builder_options.db_session_id, table_builder_options.cur_file_num); } std::string PlainTableFactory::GetPrintableOptions() const { diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index 0ce8f6931..fa367e69b 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -30,7 +30,8 @@ const size_t kFadviseTrigger = 1024 * 1024; // 1MB struct SstFileWriter::Rep { Rep(const EnvOptions& _env_options, const Options& options, Env::IOPriority _io_priority, const Comparator* _user_comparator, - ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters) + ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters, + std::string _db_session_id) : env_options(_env_options), ioptions(options), mutable_cf_options(options), @@ -38,8 +39,8 @@ struct SstFileWriter::Rep { internal_comparator(_user_comparator), cfh(_cfh), invalidate_page_cache(_invalidate_page_cache), - last_fadvise_size(0), - skip_filters(_skip_filters) {} + skip_filters(_skip_filters), + db_session_id(_db_session_id) {} std::unique_ptr file_writer; std::unique_ptr builder; @@ -57,8 +58,11 @@ struct SstFileWriter::Rep { bool invalidate_page_cache; // The size of the file during the last time we called Fadvise to remove // cached pages from page cache. - uint64_t last_fadvise_size; + uint64_t last_fadvise_size = 0; bool skip_filters; + std::string db_session_id; + uint64_t next_file_number = 1; + Status Add(const Slice& user_key, const Slice& value, const ValueType value_type) { if (!builder) { @@ -170,7 +174,14 @@ SstFileWriter::SstFileWriter(const EnvOptions& env_options, bool invalidate_page_cache, Env::IOPriority io_priority, bool skip_filters) : rep_(new Rep(env_options, options, io_priority, user_comparator, - column_family, invalidate_page_cache, skip_filters)) { + column_family, invalidate_page_cache, skip_filters, + DBImpl::GenerateDbSessionId(options.env))) { + // SstFileWriter is used to create sst files that can be added to database + // later. Therefore, no real db_id and db_session_id are associated with it. + // Here we mimic the way db_session_id behaves by getting a db_session_id + // for each SstFileWriter, and (later below) assign unique file numbers + // in the table properties. The db_id is set to be "SST Writer" for clarity. + rep_->file_info.file_size = 0; } @@ -241,22 +252,19 @@ Status SstFileWriter::Open(const std::string& file_path) { r->column_family_name = ""; cf_id = TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; } - // SstFileWriter is used to create sst files that can be added to database - // later. Therefore, no real db_id and db_session_id are associated with it. - // Here we mimic the way db_session_id behaves by resetting the db_session_id - // every time SstFileWriter is used, and in this case db_id is set to be "SST - // Writer". - std::string db_session_id = DBImpl::GenerateDbSessionId(r->ioptions.env); - if (!db_session_id.empty() && db_session_id.back() == '\n') { - db_session_id.pop_back(); - } TableBuilderOptions table_builder_options( r->ioptions, r->mutable_cf_options, r->internal_comparator, &int_tbl_prop_collector_factories, compression_type, compression_opts, cf_id, r->column_family_name, unknown_level, false /* is_bottommost */, TableFileCreationReason::kMisc, 0 /* creation_time */, 0 /* oldest_key_time */, 0 /* file_creation_time */, - "SST Writer" /* db_id */, db_session_id, 0 /* target_file_size */, 0); + "SST Writer" /* db_id */, r->db_session_id, 0 /* target_file_size */, + r->next_file_number); + // External SST files used to each get a unique session id. Now for + // slightly better uniqueness probability in constructing cache keys, we + // assign fake file numbers to each file (into table properties) and keep + // the same session id for the life of the SstFileWriter. + r->next_file_number++; // XXX: when we can remove skip_filters from the SstFileWriter public API // we can remove it from TableBuilderOptions. table_builder_options.skip_filters = r->skip_filters; diff --git a/table/table_properties.cc b/table/table_properties.cc index 373c763d6..d0aa45026 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -179,6 +179,9 @@ std::string TableProperties::ToString( AppendProperty(result, "DB identity", db_id, prop_delim, kv_delim); AppendProperty(result, "DB session identity", db_session_id, prop_delim, kv_delim); + AppendProperty(result, "DB host id", db_host_id, prop_delim, kv_delim); + AppendProperty(result, "original file number", orig_file_number, prop_delim, + kv_delim); return result; } @@ -233,6 +236,8 @@ const std::string TablePropertiesNames::kDbSessionId = "rocksdb.creating.session.identity"; const std::string TablePropertiesNames::kDbHostId = "rocksdb.creating.host.identity"; +const std::string TablePropertiesNames::kOriginalFileNumber = + "rocksdb.original.file.number"; const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size"; const std::string TablePropertiesNames::kIndexSize =