From c57f914482f6a125f63cd164a8637f40e828fb7b Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 10 Nov 2020 23:41:05 -0800 Subject: [PATCH] Use NPHash64 in more places (#7632) Summary: Since the hashes should not be persisted in output_validator nor mock_env. Also updated NPHash64 to use 64-bit seed, and comments. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7632 Test Plan: make check, and new build setting that enables modification to NPHash64, to check for behavior depending on specific values. Added that setting to one of the CircleCI configurations. Reviewed By: jay-zhuang Differential Revision: D24833780 Pulled By: pdillinger fbshipit-source-id: 02a57652ccf1ac105fbca79e77875bb7bf7c071f --- .circleci/config.yml | 2 +- Makefile | 4 ++++ db/output_validator.cc | 4 ++-- db/output_validator.h | 2 ++ env/mock_env.cc | 5 ++--- util/hash.h | 19 +++++++++++++++---- util/hash_test.cc | 2 ++ 7 files changed, 28 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 63780bca3..767c64248 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -135,7 +135,7 @@ jobs: steps: - pre-steps - install-gflags - - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain + - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 all check_some | .circleci/cat_ignore_eagain - post-steps build-linux-release: diff --git a/Makefile b/Makefile index 29c36d61f..37ffa032e 100644 --- a/Makefile +++ b/Makefile @@ -414,6 +414,10 @@ ifdef TEST_UINT128_COMPAT PLATFORM_CCFLAGS += -DTEST_UINT128_COMPAT=1 PLATFORM_CXXFLAGS += -DTEST_UINT128_COMPAT=1 endif +ifdef ROCKSDB_MODIFY_NPHASH + PLATFORM_CCFLAGS += -DROCKSDB_MODIFY_NPHASH=1 + PLATFORM_CXXFLAGS += -DROCKSDB_MODIFY_NPHASH=1 +endif # This (the first rule) must depend on "all". default: all diff --git a/db/output_validator.cc b/db/output_validator.cc index 56b8fe59e..c36c9281e 100644 --- a/db/output_validator.cc +++ b/db/output_validator.cc @@ -9,8 +9,8 @@ namespace ROCKSDB_NAMESPACE { Status OutputValidator::Add(const Slice& key, const Slice& value) { if (enable_hash_) { // Generate a rolling 64-bit hash of the key and values - paranoid_hash_ = Hash64(key.data(), key.size(), paranoid_hash_); - paranoid_hash_ = Hash64(value.data(), value.size(), paranoid_hash_); + paranoid_hash_ = NPHash64(key.data(), key.size(), paranoid_hash_); + paranoid_hash_ = NPHash64(value.data(), value.size(), paranoid_hash_); } if (enable_order_check_) { TEST_SYNC_POINT_CALLBACK("OutputValidator::Add:order_check", diff --git a/db/output_validator.h b/db/output_validator.h index 167b25e06..c2c5f9906 100644 --- a/db/output_validator.h +++ b/db/output_validator.h @@ -34,6 +34,8 @@ class OutputValidator { } private: + // Not (yet) intended to be persisted, so subject to change + // without notice between releases. uint64_t GetHash() const { return paranoid_hash_; } const InternalKeyComparator& icmp_; diff --git a/env/mock_env.cc b/env/mock_env.cc index 44c8a01b9..d83e78ffc 100644 --- a/env/mock_env.cc +++ b/env/mock_env.cc @@ -16,7 +16,7 @@ #include "port/sys_time.h" #include "rocksdb/file_system.h" #include "util/cast_util.h" -#include "util/murmurhash.h" +#include "util/hash.h" #include "util/random.h" #include "util/rate_limiter.h" @@ -32,8 +32,7 @@ class MemFile { locked_(false), size_(0), modified_time_(Now()), - rnd_(static_cast( - MurmurHash(fn.data(), static_cast(fn.size()), 0))), + rnd_(Lower32of64(GetSliceNPHash64(fn))), fsynced_bytes_(0) {} // No copying allowed. MemFile(const MemFile&) = delete; diff --git a/util/hash.h b/util/hash.h index e184eb3e9..539b822f6 100644 --- a/util/hash.h +++ b/util/hash.h @@ -36,20 +36,31 @@ extern uint64_t Hash64(const char* data, size_t n, uint64_t seed); // Specific optimization without seed (same as seed = 0) extern uint64_t Hash64(const char* data, size_t n); -// Non-persistent hash. Must only used for in-memory data structure. -// The hash results are thus applicable to change. (Thus, it rarely makes -// sense to specify a seed for this function.) +// Non-persistent hash. Must only used for in-memory data structures. +// The hash results are thus subject to change between releases, +// architectures, build configuration, etc. (Thus, it rarely makes sense +// to specify a seed for this function, except for a "rolling" hash.) // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent // results from previous seed. Recommend incrementing by a large odd number. -inline uint64_t NPHash64(const char* data, size_t n, uint32_t seed) { +inline uint64_t NPHash64(const char* data, size_t n, uint64_t seed) { +#ifdef ROCKSDB_MODIFY_NPHASH + // For testing "subject to change" + return Hash64(data, n, seed + 123456789); +#else // Currently same as Hash64 return Hash64(data, n, seed); +#endif } // Specific optimization without seed (same as seed = 0) inline uint64_t NPHash64(const char* data, size_t n) { +#ifdef ROCKSDB_MODIFY_NPHASH + // For testing "subject to change" + return Hash64(data, n, 123456789); +#else // Currently same as Hash64 return Hash64(data, n); +#endif } // Stable/persistent 32-bit hash. Moderate quality and high speed on diff --git a/util/hash_test.cc b/util/hash_test.cc index a6530b686..08fcaf574 100644 --- a/util/hash_test.cc +++ b/util/hash_test.cc @@ -585,6 +585,8 @@ TEST(MathTest, CodingGeneric) { } int main(int argc, char** argv) { + fprintf(stderr, "NPHash64 id: %x\n", + static_cast(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB"))); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS();