diff --git a/HISTORY.md b/HISTORY.md index cb4f18e79..4d5861735 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -18,6 +18,7 @@ * Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys. * Added ability to specify functions for Prepare and Validate to OptionsTypeInfo. Added methods to OptionTypeInfo to set the functions via an API. These methods are intended for RocksDB plugin developers for configuration management. * Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate. +* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use. ### Bug Fixes * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. diff --git a/include/rocksdb/unique_id.h b/include/rocksdb/unique_id.h index 030b2a724..9c6e7bf2b 100644 --- a/include/rocksdb/unique_id.h +++ b/include/rocksdb/unique_id.h @@ -9,9 +9,7 @@ namespace ROCKSDB_NAMESPACE { -// EXPERIMENTAL: This API is subject to change -// -// Computes a stable, universally unique 192-bit (24 binary char) identifier +// Computes a stable, universally unique 128-bit (16 binary char) identifier // for an SST file from TableProperties. This is supported for table (SST) // files created with RocksDB 6.24 and later. NotSupported will be returned // for other cases. The first 16 bytes (128 bits) is of sufficient quality @@ -22,20 +20,33 @@ namespace ROCKSDB_NAMESPACE { // .c_str() on the result will often result in information loss and very // poor uniqueness probability. // -// More detail: the first 128 bits are *guaranteed* unique for SST files +// More detail: the value is *guaranteed* unique for SST files // generated in the same process (even different DBs, RocksDB >= 6.26), // and first 128 bits are guaranteed not "all zeros" (RocksDB >= 6.26) // so that the "all zeros" value can be used reliably for a null ID. -// Assuming one generates many SST files in the lifetime of each process, -// the probability of collision between processes is "better than -// random": if processes generate n SST files on average, we expect to -// generate roughly 2^64 * sqrt(n) files before first collision in the -// first 128 bits. See https://github.com/pdillinger/unique_id -// Using the full 192 bits, we expect to generate roughly 2^96 * sqrt(n) -// files before first collision. +// These IDs are more than sufficient for SST uniqueness within a each of +// meany DBs or hosts. For an extreme example assuming random IDs, consider +// 10^9 hosts each with 10^9 live SST files being replaced at 10^6/second. +// Such a service would need to run for 10 million years to see an ID +// collision among live SST files on any host. +// +// And assuming one generates many SST files in the lifetime of each process, +// the probability of ID collisions is much "better than random"; see +// https://github.com/pdillinger/unique_id Status GetUniqueIdFromTableProperties(const TableProperties &props, std::string *out_id); +// Computes a 192-bit (24 binary char) stable, universally unique ID +// with an extra 64 bits of uniqueness compared to the standard ID. It is only +// appropriate to use this ID instead of the 128-bit ID if ID collisions +// between files among any hosts in a vast fleet is a problem, such as a shared +// global namespace for SST file backups. Under this criteria, the extreme +// example above would expect a global file ID collision every 4 days with +// 128-bit IDs (using some worst-case assumptions about process lifetime). +// It's 10^17 years with 192-bit IDs. +Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props, + std::string *out_id); + // EXPERIMENTAL: This API is subject to change // // Converts a binary string (unique id) to hexadecimal, with each 64 bits diff --git a/table/table_test.cc b/table/table_test.cc index 8a3f4f7d8..a01a34e6f 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1428,12 +1428,19 @@ TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, tp->orig_file_number = file_number; TestIds t; { + std::string euid; + EXPECT_OK(GetExtendedUniqueIdFromTableProperties(*tp, &euid)); + EXPECT_EQ(euid.size(), 24U); + t.external_id[0] = DecodeFixed64(&euid[0]); + t.external_id[1] = DecodeFixed64(&euid[8]); + t.external_id[2] = DecodeFixed64(&euid[16]); + std::string uid; EXPECT_OK(GetUniqueIdFromTableProperties(*tp, &uid)); - EXPECT_EQ(uid.size(), 24U); - t.external_id[0] = DecodeFixed64(&uid[0]); - t.external_id[1] = DecodeFixed64(&uid[8]); - t.external_id[2] = DecodeFixed64(&uid[16]); + EXPECT_EQ(uid.size(), 16U); + EXPECT_EQ(uid, euid.substr(0, 16)); + EXPECT_EQ(t.external_id[0], DecodeFixed64(&uid[0])); + EXPECT_EQ(t.external_id[1], DecodeFixed64(&uid[8])); } // All these should be effectively random EXPECT_TRUE(seen->insert(t.external_id[0]).second); @@ -1443,6 +1450,7 @@ TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, // Get internal with internal API EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, &t.internal_id)); + EXPECT_NE(t.internal_id, kNullUniqueId64x3); // Verify relationship UniqueId64x3 tmp = t.internal_id; @@ -1450,6 +1458,21 @@ TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, EXPECT_EQ(tmp, t.external_id); ExternalUniqueIdToInternal(&tmp); EXPECT_EQ(tmp, t.internal_id); + + // And 128-bit internal version + UniqueId64x2 tmp2{}; + EXPECT_OK(GetSstInternalUniqueId(db_id, db_session_id, file_number, &tmp2)); + EXPECT_NE(tmp2, kNullUniqueId64x2); + + EXPECT_EQ(tmp2[0], t.internal_id[0]); + EXPECT_EQ(tmp2[1], t.internal_id[1]); + InternalUniqueIdToExternal(&tmp2); + EXPECT_EQ(tmp2[0], t.external_id[0]); + EXPECT_EQ(tmp2[1], t.external_id[1]); + ExternalUniqueIdToInternal(&tmp2); + EXPECT_EQ(tmp2[0], t.internal_id[0]); + EXPECT_EQ(tmp2[1], t.internal_id[1]); + return t; } } // namespace @@ -1590,7 +1613,7 @@ TEST_F(TablePropertyTest, UniqueIdHumanStrings) { SetGoodTableProperties(&tp); std::string tmp; - EXPECT_OK(GetUniqueIdFromTableProperties(tp, &tmp)); + EXPECT_OK(GetExtendedUniqueIdFromTableProperties(tp, &tmp)); EXPECT_EQ(tmp, (std::string{{'\x64', '\x74', '\xdf', '\x65', '\x03', '\x23', '\xbd', '\xf0', '\xb4', '\x8e', '\x64', '\xf3', @@ -1599,6 +1622,9 @@ TEST_F(TablePropertyTest, UniqueIdHumanStrings) { EXPECT_EQ(UniqueIdToHumanString(tmp), "6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B"); + EXPECT_OK(GetUniqueIdFromTableProperties(tp, &tmp)); + EXPECT_EQ(UniqueIdToHumanString(tmp), "6474DF650323BDF0-B48E64F3039308CA"); + // including zero padding tmp = std::string(24U, '\0'); tmp[15] = '\x12'; @@ -1632,16 +1658,22 @@ TEST_F(TablePropertyTest, UniqueIdsFailure) { SetGoodTableProperties(&tp); tp.db_id = ""; EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); // Missing session id SetGoodTableProperties(&tp); tp.db_session_id = ""; EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); // Missing file number SetGoodTableProperties(&tp); tp.orig_file_number = 0; EXPECT_TRUE(GetUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); + EXPECT_TRUE( + GetExtendedUniqueIdFromTableProperties(tp, &tmp).IsNotSupported()); } // This test include all the basic checks except those for index size and block diff --git a/table/unique_id.cc b/table/unique_id.cc index 95e9ded29..f23dbc5cf 100644 --- a/table/unique_id.cc +++ b/table/unique_id.cc @@ -58,7 +58,7 @@ Status DecodeSessionId(const std::string &db_session_id, uint64_t *upper, Status GetSstInternalUniqueId(const std::string &db_id, const std::string &db_session_id, - uint64_t file_number, UniqueId64x3 *out) { + uint64_t file_number, UniqueIdPtr out) { if (db_id.empty()) { return Status::NotSupported("Missing db_id"); } @@ -84,7 +84,7 @@ Status GetSstInternalUniqueId(const std::string &db_id, // that here because of testing and old versions.) // We put this first in anticipation of matching a small-ish set of cache // key prefixes to cover entries relevant to any DB. - (*out)[0] = session_lower; + out.ptr[0] = session_lower; // Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy) // for very high global uniqueness entropy. @@ -97,10 +97,12 @@ Status GetSstInternalUniqueId(const std::string &db_id, // Xor in file number for guaranteed uniqueness by file number for a given // session and DB id. (Xor slightly better than + here. See // https://github.com/pdillinger/unique_id ) - (*out)[1] = db_a ^ file_number; + out.ptr[1] = db_a ^ file_number; // Extra (optional) global uniqueness - (*out)[2] = db_b; + if (out.extended) { + out.ptr[2] = db_b; + } return Status::OK(); } @@ -114,46 +116,63 @@ constexpr uint64_t kHiOffsetForZero = 17391078804906429400U; constexpr uint64_t kLoOffsetForZero = 6417269962128484497U; } // namespace -void InternalUniqueIdToExternal(UniqueId64x3 *in_out) { +void InternalUniqueIdToExternal(UniqueIdPtr in_out) { uint64_t hi, lo; - BijectiveHash2x64((*in_out)[1] + kHiOffsetForZero, - (*in_out)[0] + kLoOffsetForZero, &hi, &lo); - (*in_out)[0] = lo; - (*in_out)[1] = hi; - (*in_out)[2] += lo + hi; + BijectiveHash2x64(in_out.ptr[1] + kHiOffsetForZero, + in_out.ptr[0] + kLoOffsetForZero, &hi, &lo); + in_out.ptr[0] = lo; + in_out.ptr[1] = hi; + if (in_out.extended) { + in_out.ptr[2] += lo + hi; + } } -void ExternalUniqueIdToInternal(UniqueId64x3 *in_out) { - uint64_t lo = (*in_out)[0]; - uint64_t hi = (*in_out)[1]; - (*in_out)[2] -= lo + hi; +void ExternalUniqueIdToInternal(UniqueIdPtr in_out) { + uint64_t lo = in_out.ptr[0]; + uint64_t hi = in_out.ptr[1]; + if (in_out.extended) { + in_out.ptr[2] -= lo + hi; + } BijectiveUnhash2x64(hi, lo, &hi, &lo); - (*in_out)[0] = lo - kLoOffsetForZero; - (*in_out)[1] = hi - kHiOffsetForZero; + in_out.ptr[0] = lo - kLoOffsetForZero; + in_out.ptr[1] = hi - kHiOffsetForZero; } -std::string EncodeUniqueIdBytes(const UniqueId64x3 &in) { - std::string ret(24U, '\0'); - EncodeFixed64(&ret[0], in[0]); - EncodeFixed64(&ret[8], in[1]); - EncodeFixed64(&ret[16], in[2]); +std::string EncodeUniqueIdBytes(UniqueIdPtr in) { + std::string ret(in.extended ? 24U : 16U, '\0'); + EncodeFixed64(&ret[0], in.ptr[0]); + EncodeFixed64(&ret[8], in.ptr[1]); + if (in.extended) { + EncodeFixed64(&ret[16], in.ptr[2]); + } return ret; } -Status GetUniqueIdFromTableProperties(const TableProperties &props, - std::string *out_id) { - UniqueId64x3 tmp{}; +template +Status GetUniqueIdFromTablePropertiesHelper(const TableProperties &props, + std::string *out_id) { + ID tmp{}; Status s = GetSstInternalUniqueId(props.db_id, props.db_session_id, props.orig_file_number, &tmp); if (s.ok()) { InternalUniqueIdToExternal(&tmp); - *out_id = EncodeUniqueIdBytes(tmp); + *out_id = EncodeUniqueIdBytes(&tmp); } else { out_id->clear(); } return s; } +Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props, + std::string *out_id) { + return GetUniqueIdFromTablePropertiesHelper(props, out_id); +} + +Status GetUniqueIdFromTableProperties(const TableProperties &props, + std::string *out_id) { + return GetUniqueIdFromTablePropertiesHelper(props, out_id); +} + std::string UniqueIdToHumanString(const std::string &id) { // Not so efficient, but that's OK std::string str = Slice(id).ToString(/*hex*/ true); diff --git a/table/unique_id_impl.h b/table/unique_id_impl.h index 8f414f7d6..e8414448e 100644 --- a/table/unique_id_impl.h +++ b/table/unique_id_impl.h @@ -11,8 +11,34 @@ namespace ROCKSDB_NAMESPACE { +// Standard size unique ID, good enough for almost all practical purposes +using UniqueId64x2 = std::array; + +// Value never used as an actual unique ID so can be used for "null" +constexpr UniqueId64x2 kNullUniqueId64x2 = {}; + +// Extended size unique ID, for extra certainty of uniqueness among SST files +// spanning many hosts over a long time (rarely if ever needed) using UniqueId64x3 = std::array; +// Value never used as an actual unique ID so can be used for "null" +constexpr UniqueId64x3 kNullUniqueId64x3 = {}; + +// Dynamic pointer wrapper for one of the two above +struct UniqueIdPtr { + uint64_t *ptr = nullptr; + bool extended = false; + + /*implicit*/ UniqueIdPtr(UniqueId64x2 *id) { + ptr = (*id).data(); + extended = false; + } + /*implicit*/ UniqueIdPtr(UniqueId64x3 *id) { + ptr = (*id).data(); + extended = true; + } +}; + // Helper for GetUniqueIdFromTableProperties. This function can also be used // for temporary ids for files without sufficient information in table // properties. The internal unique id is more structured than the public @@ -21,7 +47,7 @@ using UniqueId64x3 = std::array; // is long term stable. Status GetSstInternalUniqueId(const std::string &db_id, const std::string &db_session_id, - uint64_t file_number, UniqueId64x3 *out); + uint64_t file_number, UniqueIdPtr out); // Helper for GetUniqueIdFromTableProperties. External unique ids go through // this extra hashing layer so that prefixes of the unique id have predictable @@ -29,14 +55,14 @@ Status GetSstInternalUniqueId(const std::string &db_id, // the full 192 bits. // This transformation must be long term stable to ensure // GetUniqueIdFromTableProperties is long term stable. -void InternalUniqueIdToExternal(UniqueId64x3 *in_out); +void InternalUniqueIdToExternal(UniqueIdPtr in_out); // Reverse of InternalUniqueIdToExternal mostly for testing purposes // (demonstrably 1-to-1 on the first 128 bits and on the full 192 bits). -void ExternalUniqueIdToInternal(UniqueId64x3 *in_out); +void ExternalUniqueIdToInternal(UniqueIdPtr in_out); // Convert numerical format to byte format for public API -std::string EncodeUniqueIdBytes(const UniqueId64x3 &in); +std::string EncodeUniqueIdBytes(UniqueIdPtr in); // Reformat a random value down to our "DB session id" format, // which is intended to be compact and friendly for use in file names.