rocksdb/util/hash.cc
Giuseppe Ottaviano 8f927e5f75 Fix undefined behavior in Hash
Summary:
Instead of ignoring UBSan checks, fix the negative shifts in
Hash(). Also add test to make sure the hash values are stable over
time. The values were computed before this change, so the test also
verifies the correctness of the change.
Closes https://github.com/facebook/rocksdb/pull/2546

Differential Revision: D5386369

Pulled By: yiwu-arbug

fbshipit-source-id: 6de4b44461a544d6222cc5d72d8cda2c0373d17e
2017-07-10 12:29:24 -07:00

59 lines
2.0 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <string.h>
#include "util/coding.h"
#include "util/hash.h"
namespace rocksdb {
uint32_t Hash(const char* data, size_t n, uint32_t seed) {
// Similar to murmur hash
const uint32_t m = 0xc6a4a793;
const uint32_t r = 24;
const char* limit = data + n;
uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
// Pick up four bytes at a time
while (data + 4 <= limit) {
uint32_t w = DecodeFixed32(data);
data += 4;
h += w;
h *= m;
h ^= (h >> 16);
}
// Pick up remaining bytes
switch (limit - data) {
// Note: The original hash implementation used data[i] << shift, which
// promotes the char to int and then performs the shift. If the char is
// negative, the shift is undefined behavior in C++. The hash algorithm is
// part of the format definition, so we cannot change it; to obtain the same
// behavior in a legal way we just cast to uint32_t, which will do
// sign-extension. To guarantee compatibility with architectures where chars
// are unsigned we first cast the char to int8_t.
case 3:
h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
// fall through
case 2:
h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
// fall through
case 1:
h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
h *= m;
h ^= (h >> r);
break;
}
return h;
}
} // namespace rocksdb