diff --git a/HISTORY.md b/HISTORY.md index 8197d6fd9..07c8035e1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -30,6 +30,7 @@ * Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail. * Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface. * Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level. +* SST files have a new table property called db_host_id, which is set to the hostname by default. A new option in DBOptions, db_host_id, allows the property value to be overridden with a user specified string, or disable it completely by making the option string empty. ## 6.13 (09/12/2020) ### Bug fixes diff --git a/db/db_dynamic_level_test.cc b/db/db_dynamic_level_test.cc index 6ecf727c9..9f2013c02 100644 --- a/db/db_dynamic_level_test.cc +++ b/db/db_dynamic_level_test.cc @@ -141,6 +141,7 @@ TEST_F(DBTestDynamicLevel, DynamicLevelMaxBytesBase2) { options.max_background_compactions = 2; options.num_levels = 5; options.max_compaction_bytes = 0; // Force not expanding in compactions + options.db_host_id = ""; // Setting this messes up the file size calculation BlockBasedTableOptions table_options; table_options.block_size = 1024; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc index ca085cc5d..2a0a3c11b 100644 --- a/db/db_table_properties_test.cc +++ b/db/db_table_properties_test.cc @@ -11,9 +11,11 @@ #include #include "db/db_test_util.h" +#include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/db.h" #include "rocksdb/utilities/table_properties_collectors.h" +#include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/random.h" @@ -274,6 +276,55 @@ TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) { } } +class DBTableHostnamePropertyTest + : public DBTestBase, + public ::testing::WithParamInterface> { + public: + DBTableHostnamePropertyTest() + : DBTestBase("/db_table_hostname_property_test", + /*env_do_fsync=*/false) {} +}; + +TEST_P(DBTableHostnamePropertyTest, DbHostLocationProperty) { + option_config_ = std::get<0>(GetParam()); + Options opts = CurrentOptions(); + std::string expected_host_id = std::get<1>(GetParam()); + ; + if (expected_host_id == kHostnameForDbHostId) { + ASSERT_OK(env_->GetHostNameString(&expected_host_id)); + } else { + opts.db_host_id = expected_host_id; + } + CreateAndReopenWithCF({"goku"}, opts); + + for (uint32_t cf = 0; cf < 2; ++cf) { + Put(cf, "key", "val"); + Put(cf, "foo", "bar"); + Flush(cf); + + TablePropertiesCollection fname_to_props; + ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[cf], &fname_to_props)); + ASSERT_EQ(1U, fname_to_props.size()); + + ASSERT_EQ(fname_to_props.begin()->second->db_host_id, expected_host_id); + } +} + +INSTANTIATE_TEST_CASE_P( + DBTableHostnamePropertyTest, DBTableHostnamePropertyTest, + ::testing::Values( + // OptionConfig, override db_host_location + std::make_tuple(DBTestBase::OptionConfig::kDefault, + kHostnameForDbHostId), + std::make_tuple(DBTestBase::OptionConfig::kDefault, "foobar"), + std::make_tuple(DBTestBase::OptionConfig::kDefault, ""), + std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, + kHostnameForDbHostId), + std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, + "foobar"), + std::make_tuple(DBTestBase::OptionConfig::kPlainTableFirstBytePrefix, + ""))); + class DeletionTriggeredCompactionTestListener : public EventListener { public: void OnCompactionBegin(DB* , const CompactionJobInfo& ci) override { diff --git a/env/env.cc b/env/env.cc index 829fcefb1..9dcdd3421 100644 --- a/env/env.cc +++ b/env/env.cc @@ -140,6 +140,16 @@ Status Env::GetChildrenFileAttributes(const std::string& dir, return Status::OK(); } +Status Env::GetHostNameString(std::string* result) { + std::array hostname_buf; + Status s = GetHostName(hostname_buf.data(), hostname_buf.size()); + if (s.ok()) { + hostname_buf[hostname_buf.size() - 1] = '\0'; + result->assign(hostname_buf.data()); + } + return s; +} + SequentialFile::~SequentialFile() { } diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 955d591c3..13e528d2a 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -452,9 +452,15 @@ class Env { // Sleep/delay the thread for the prescribed number of micro-seconds. virtual void SleepForMicroseconds(int micros) = 0; - // Get the current host name. + // Get the current host name as a null terminated string iff the string + // length is < len. The hostname should otherwise be truncated to len. virtual Status GetHostName(char* name, uint64_t len) = 0; + // Get the current hostname from the given env as a std::string in result. + // The result may be truncated if the hostname is too + // long + virtual Status GetHostNameString(std::string* result); + // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC). // Only overwrites *unix_time on success. virtual Status GetCurrentTime(int64_t* unix_time) = 0; @@ -576,6 +582,9 @@ class Env { // Pointer to the underlying FileSystem implementation std::shared_ptr file_system_; + + private: + static const size_t kMaxHostNameLen = 256; }; // The factory function to construct a ThreadStatusUpdater. Any Env diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index a85fdf4a6..1e4658cff 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -348,6 +348,8 @@ struct DbPath { DbPath(const std::string& p, uint64_t t) : path(p), target_size(t) {} }; +static const std::string kHostnameForDbHostId = "__hostname__"; + struct DBOptions { // The function recovers options to the option as in version 4.6. DBOptions* OldDefaults(int rocksdb_major_version = 4, @@ -1180,6 +1182,19 @@ struct DBOptions { // // Default: false bool allow_data_in_errors = false; + + // A string identifying the machine hosting the DB. This + // will be written as a property in every SST file written by the DB (or + // by offline writers such as SstFileWriter and RepairDB). It can be useful + // for troubleshooting in memory corruption caused by a failing host when + // writing a file, by tracing back to the writing host. These corruptions + // may not be caught by the checksum since they happen before checksumming. + // If left as default, the table writer will substitute it with the actual + // hostname when writing the SST file. If set to an empty stirng, the + // property will not be written to the SST file. + // + // Default: hostname + std::string db_host_id = kHostnameForDbHostId; }; // Options to control the behavior of a database (passed to DB::Open) diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index ba3eca752..c6810aa55 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -32,6 +32,7 @@ typedef std::map UserCollectedProperties; struct TablePropertiesNames { static const std::string kDbId; static const std::string kDbSessionId; + static const std::string kDbHostId; static const std::string kDataSize; static const std::string kIndexSize; static const std::string kIndexPartitions; @@ -206,6 +207,12 @@ struct TableProperties { // empty string. std::string db_session_id; + // Location of the machine hosting the DB instance + // db_host_id identifies the location of the host in some form + // (hostname by default, but can also be any string of the user's choosing). + // It can potentially change whenever the DB is opened + std::string db_host_id; + // Name of the column family with which this SST file is associated. // If column family is unknown, `column_family_name` will be an empty string. std::string column_family_name; diff --git a/options/cf_options.cc b/options/cf_options.cc index fb56f2388..caa4aa29c 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -853,7 +853,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options, compaction_thread_limiter(cf_options.compaction_thread_limiter), file_checksum_gen_factory(db_options.file_checksum_gen_factory.get()), sst_partitioner_factory(cf_options.sst_partitioner_factory), - allow_data_in_errors(db_options.allow_data_in_errors) {} + allow_data_in_errors(db_options.allow_data_in_errors), + db_host_id(db_options.db_host_id) {} // Multiple two operands. If they overflow, return op1. uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) { diff --git a/options/cf_options.h b/options/cf_options.h index ca086b5c8..b74fbf1a7 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -124,6 +124,8 @@ struct ImmutableCFOptions { std::shared_ptr sst_partitioner_factory; bool allow_data_in_errors; + + std::string db_host_id; }; struct MutableCFOptions { diff --git a/options/db_options.cc b/options/db_options.cc index c61154a4b..3733d448c 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -389,6 +389,9 @@ static std::unordered_map {offsetof(struct ImmutableDBOptions, bgerror_resume_retry_interval), OptionType::kUInt64T, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"db_host_id", + {offsetof(struct ImmutableDBOptions, db_host_id), OptionType::kString, + OptionVerificationType::kNormal, OptionTypeFlags::kCompareNever}}, // The following properties were handled as special cases in ParseOption // This means that the properties could be read from the options file // but never written to the file or compared to each other. @@ -576,7 +579,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) best_efforts_recovery(options.best_efforts_recovery), max_bgerror_resume_count(options.max_bgerror_resume_count), bgerror_resume_retry_interval(options.bgerror_resume_retry_interval), - allow_data_in_errors(options.allow_data_in_errors) { + allow_data_in_errors(options.allow_data_in_errors), + db_host_id(options.db_host_id) { } void ImmutableDBOptions::Dump(Logger* log) const { @@ -738,6 +742,8 @@ void ImmutableDBOptions::Dump(Logger* log) const { bgerror_resume_retry_interval); ROCKS_LOG_HEADER(log, " Options.allow_data_in_errors: %d", allow_data_in_errors); + ROCKS_LOG_HEADER(log, " Options.db_host_id: %s", + db_host_id.c_str()); } MutableDBOptions::MutableDBOptions() diff --git a/options/db_options.h b/options/db_options.h index 631d38ac0..42a58e256 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -92,6 +92,7 @@ struct ImmutableDBOptions { int max_bgerror_resume_count; uint64_t bgerror_resume_retry_interval; bool allow_data_in_errors; + std::string db_host_id; }; struct MutableDBOptions { diff --git a/options/options_helper.cc b/options/options_helper.cc index 6bec5e82b..a30006d1c 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -166,6 +166,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, immutable_db_options.max_bgerror_resume_count; options.bgerror_resume_retry_interval = immutable_db_options.bgerror_resume_retry_interval; + options.db_host_id = immutable_db_options.db_host_id; return options; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 3c960f765..21bb76152 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -226,6 +226,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { {offsetof(struct DBOptions, wal_filter), sizeof(const WalFilter*)}, {offsetof(struct DBOptions, file_checksum_gen_factory), sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, db_host_id), sizeof(std::string)}, }; char* options_ptr = new char[sizeof(DBOptions)]; @@ -332,7 +333,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "write_dbid_to_manifest=false;" "best_efforts_recovery=false;" "max_bgerror_resume_count=2;" - "bgerror_resume_retry_interval=1000000", + "bgerror_resume_retry_interval=1000000" + "db_host_id=hostname", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 28bfbb7b2..f793447ec 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -331,6 +331,7 @@ struct BlockBasedTableBuilder::Rep { // DB IDs const std::string db_id; const std::string db_session_id; + std::string db_host_id; std::vector> table_properties_collectors; @@ -449,7 +450,8 @@ struct BlockBasedTableBuilder::Rep { target_file_size(_target_file_size), file_creation_time(_file_creation_time), db_id(_db_id), - db_session_id(_db_session_id) { + db_session_id(_db_session_id), + db_host_id(ioptions.db_host_id) { for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) { compression_ctxs[i].reset(new CompressionContext(compression_type)); } @@ -491,6 +493,10 @@ struct BlockBasedTableBuilder::Rep { verify_ctxs[i].reset(new UncompressionContext(compression_type)); } } + + if (!ReifyDbHostIdProperty(ioptions.env, &db_host_id).ok()) { + ROCKS_LOG_INFO(ioptions.info_log, "db_host_id property will not be set"); + } } Rep(const Rep&) = delete; @@ -1412,6 +1418,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->props.file_creation_time = rep_->file_creation_time; rep_->props.db_id = rep_->db_id; rep_->props.db_session_id = rep_->db_session_id; + rep_->props.db_host_id = rep_->db_host_id; // Add basic properties property_block_builder.AddTableProperty(rep_->props); diff --git a/table/format.cc b/table/format.cc index 23dc0bbc1..41235a1b5 100644 --- a/table/format.cc +++ b/table/format.cc @@ -19,6 +19,7 @@ #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "rocksdb/env.h" +#include "rocksdb/options.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/persistent_cache_helper.h" @@ -403,4 +404,18 @@ Status UncompressBlockContents(const UncompressionInfo& uncompression_info, ioptions, allocator); } +// Replace the contents of db_host_id with the actual hostname, if db_host_id +// matches the keyword kHostnameForDbHostId +Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id) { + assert(db_host_id); + if (*db_host_id == kHostnameForDbHostId) { + Status s = env->GetHostNameString(db_host_id); + if (!s.ok()) { + db_host_id->clear(); + } + return s; + } + + return Status::OK(); +} } // namespace ROCKSDB_NAMESPACE diff --git a/table/format.h b/table/format.h index e40a5ceae..d012502cc 100644 --- a/table/format.h +++ b/table/format.h @@ -331,6 +331,9 @@ extern Status UncompressBlockContentsForCompressionType( BlockContents* contents, uint32_t compress_format_version, const ImmutableCFOptions& ioptions, MemoryAllocator* allocator = nullptr); +// Replace db_host_id contents with the real hostname if necessary +extern Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id); + // Implementation details follow. Clients should ignore, // TODO(andrewkr): we should prefer one way of representing a null/uninitialized diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 6b781de1e..4c41b7f86 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -102,6 +102,9 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { if (!props.db_session_id.empty()) { Add(TablePropertiesNames::kDbSessionId, props.db_session_id); } + if (!props.db_host_id.empty()) { + Add(TablePropertiesNames::kDbHostId, props.db_host_id); + } if (!props.filter_policy_name.empty()) { Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name); @@ -322,6 +325,8 @@ Status ReadProperties(const ReadOptions& read_options, new_table_properties->db_id = raw_val.ToString(); } else if (key == TablePropertiesNames::kDbSessionId) { new_table_properties->db_session_id = raw_val.ToString(); + } else if (key == TablePropertiesNames::kDbHostId) { + new_table_properties->db_host_id = raw_val.ToString(); } else if (key == TablePropertiesNames::kFilterPolicy) { new_table_properties->filter_policy_name = raw_val.ToString(); } else if (key == TablePropertiesNames::kColumnFamilyName) { diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index df643cf7b..f1dc0f14c 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -100,6 +100,10 @@ PlainTableBuilder::PlainTableBuilder( properties_.column_family_name = column_family_name; properties_.db_id = db_id; properties_.db_session_id = db_session_id; + properties_.db_host_id = ioptions.db_host_id; + if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) { + ROCKS_LOG_INFO(ioptions_.info_log, "db_host_id property will not be set"); + } properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr ? moptions_.prefix_extractor->Name() : "nullptr"; diff --git a/table/sst_file_reader_test.cc b/table/sst_file_reader_test.cc index 8a63b69bd..bcd1af294 100644 --- a/table/sst_file_reader_test.cc +++ b/table/sst_file_reader_test.cc @@ -90,6 +90,9 @@ class SstFileReaderTest : public testing::Test { if (check_global_seqno) { auto properties = reader.GetTableProperties(); ASSERT_TRUE(properties); + std::string hostname; + ASSERT_OK(env_->GetHostNameString(&hostname)); + ASSERT_EQ(properties->db_host_id, hostname); auto& user_properties = properties->user_collected_properties; ASSERT_TRUE( user_properties.count(ExternalSstFilePropertyNames::kGlobalSeqno)); diff --git a/table/table_properties.cc b/table/table_properties.cc index 622f3d45b..6af1f379f 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -196,6 +196,8 @@ void TableProperties::Add(const TableProperties& tp) { const std::string TablePropertiesNames::kDbId = "rocksdb.creating.db.identity"; const std::string TablePropertiesNames::kDbSessionId = "rocksdb.creating.session.identity"; +const std::string TablePropertiesNames::kDbHostId = + "rocksdb.creating.host.identity"; const std::string TablePropertiesNames::kDataSize = "rocksdb.data.size"; const std::string TablePropertiesNames::kIndexSize = diff --git a/table/table_test.cc b/table/table_test.cc index 2ff312490..dcfda22be 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3529,6 +3529,7 @@ TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) { std::vector keys; stl_wrappers::KVMap kvmap; Options options; + options.db_host_id = ""; test::PlainInternalKeyComparator internal_comparator(options.comparator); options.compression = kNoCompression; BlockBasedTableOptions table_options;