8f927e5f75
Summary: Instead of ignoring UBSan checks, fix the negative shifts in Hash(). Also add test to make sure the hash values are stable over time. The values were computed before this change, so the test also verifies the correctness of the change. Closes https://github.com/facebook/rocksdb/pull/2546 Differential Revision: D5386369 Pulled By: yiwu-arbug fbshipit-source-id: 6de4b44461a544d6222cc5d72d8cda2c0373d17e
59 lines
2.0 KiB
C++
59 lines
2.0 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
// This source code is also licensed under the GPLv2 license found in the
|
|
// COPYING file in the root directory of this source tree.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include <string.h>
|
|
#include "util/coding.h"
|
|
#include "util/hash.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
uint32_t Hash(const char* data, size_t n, uint32_t seed) {
|
|
// Similar to murmur hash
|
|
const uint32_t m = 0xc6a4a793;
|
|
const uint32_t r = 24;
|
|
const char* limit = data + n;
|
|
uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
|
|
|
|
// Pick up four bytes at a time
|
|
while (data + 4 <= limit) {
|
|
uint32_t w = DecodeFixed32(data);
|
|
data += 4;
|
|
h += w;
|
|
h *= m;
|
|
h ^= (h >> 16);
|
|
}
|
|
|
|
// Pick up remaining bytes
|
|
switch (limit - data) {
|
|
// Note: The original hash implementation used data[i] << shift, which
|
|
// promotes the char to int and then performs the shift. If the char is
|
|
// negative, the shift is undefined behavior in C++. The hash algorithm is
|
|
// part of the format definition, so we cannot change it; to obtain the same
|
|
// behavior in a legal way we just cast to uint32_t, which will do
|
|
// sign-extension. To guarantee compatibility with architectures where chars
|
|
// are unsigned we first cast the char to int8_t.
|
|
case 3:
|
|
h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
|
|
// fall through
|
|
case 2:
|
|
h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
|
|
// fall through
|
|
case 1:
|
|
h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
|
|
h *= m;
|
|
h ^= (h >> r);
|
|
break;
|
|
}
|
|
return h;
|
|
}
|
|
|
|
} // namespace rocksdb
|