rocksdb/util/murmurhash.cc

//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
//
/*
  Murmurhash from http://sites.google.com/site/murmurhash/

  All code is released to the public domain. For business purposes, Murmurhash
  is under the MIT license.
*/
#include "murmurhash.h"
#include "port/lang.h"

#if defined(__x86_64__)

// -------------------------------------------------------------------
//
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
//
// 64-bit hash for 64-bit platforms

#ifdef ROCKSDB_UBSAN_RUN
#if defined(__clang__)
__attribute__((__no_sanitize__("alignment")))
#elif defined(__GNUC__)
__attribute__((__no_sanitize_undefined__))
#endif
#endif
uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
{
    const uint64_t m = 0xc6a4a7935bd1e995;
    const int r = 47;

    uint64_t h = seed ^ (len * m);

    const uint64_t * data = (const uint64_t *)key;
    const uint64_t * end = data + (len/8);

    while(data != end)
    {
        uint64_t k = *data++;

        k *= m;
        k ^= k >> r;
        k *= m;

        h ^= k;
        h *= m;
    }

    const unsigned char * data2 = (const unsigned char*)data;

    switch(len & 7)
    {
    case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED;
    case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED;
    case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED;
    case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED;
    case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED;
    case 2: h ^= ((uint64_t)data2[1]) << 8;  FALLTHROUGH_INTENDED;
    case 1: h ^= ((uint64_t)data2[0]);
        h *= m;
    };

    h ^= h >> r;
    h *= m;
    h ^= h >> r;

    return h;
}

#elif defined(__i386__)

// -------------------------------------------------------------------
//
// Note - This code makes a few assumptions about how your machine behaves -
//
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
//
// And it has a few limitations -
//
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
//    machines.

unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
    // 'm' and 'r' are mixing constants generated offline.
    // They're not really 'magic', they just happen to work well.

    const unsigned int m = 0x5bd1e995;
    const int r = 24;

    // Initialize the hash to a 'random' value

    unsigned int h = seed ^ len;

    // Mix 4 bytes at a time into the hash

    const unsigned char * data = (const unsigned char *)key;

    while(len >= 4)
    {
        unsigned int k = *(unsigned int *)data;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    // Handle the last few bytes of the input array

    switch(len)
    {
    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
    case 1: h ^= data[0];
        h *= m;
    };

    // Do a few final mixes of the hash to ensure the last few
    // bytes are well-incorporated.

    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}

#else

// -------------------------------------------------------------------
//
// Same as MurmurHash2, but endian- and alignment-neutral.
// Half the speed though, alas.

unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
{
    const unsigned int m = 0x5bd1e995;
    const int r = 24;

    unsigned int h = seed ^ len;

    const unsigned char * data = (const unsigned char *)key;

    while(len >= 4)
    {
        unsigned int k;

        k  = data[0];
        k |= data[1] << 8;
        k |= data[2] << 16;
        k |= data[3] << 24;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    switch(len)
    {
    case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
    case 2: h ^= data[1] << 8;  FALLTHROUGH_INTENDED;
    case 1: h ^= data[0];
        h *= m;
    };

    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return h;
}

#endif
Updated all copyright headers to the new format. 2016-02-10 00:12:00 +01:00			`// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.`
Change RocksDB License Summary: Closes https://github.com/facebook/rocksdb/pull/2589 Differential Revision: D5431502 Pulled By: siying fbshipit-source-id: 8ebf8c87883daa9daa54b2303d11ce01ab1f6f75 2017-07-16 01:03:42 +02:00			`// This source code is licensed under both the GPLv2 (found in the`
			`// COPYING file in the root directory) and Apache 2.0 License`
			`// (found in the LICENSE.Apache file in the root directory).`
Add appropriate LICENSE and Copyright message. Summary: Add appropriate LICENSE and Copyright message. Test Plan: make check Reviewers: CC: Task ID: # Blame Rev: 2013-10-16 23:59:46 +02:00			`//`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`/*`
			`Murmurhash from http://sites.google.com/site/murmurhash/`

Optimize for serial commits in 2PC Summary: Throughput: 46k tps in our sysbench settings (filling the details later) The idea is to have the simplest change that gives us a reasonable boost in 2PC throughput. Major design changes: 1. The WAL file internal buffer is not flushed after each write. Instead it is flushed before critical operations (WAL copy via fs) or when FlushWAL is called by MySQL. Flushing the WAL buffer is also protected via mutex_. 2. Use two sequence numbers: last seq, and last seq for write. Last seq is the last visible sequence number for reads. Last seq for write is the next sequence number that should be used to write to WAL/memtable. This allows to have a memtable write be in parallel to WAL writes. 3. BatchGroup is not used for writes. This means that we can have parallel writers which changes a major assumption in the code base. To accommodate for that i) allow only 1 WriteImpl that intends to write to memtable via mem_mutex_--which is fine since in 2PC almost all of the memtable writes come via group commit phase which is serial anyway, ii) make all the parts in the code base that assumed to be the only writer (via EnterUnbatched) to also acquire mem_mutex_, iii) stat updates are protected via a stat_mutex_. Note: the first commit has the approach figured out but is not clean. Submitting the PR anyway to get the early feedback on the approach. If we are ok with the approach I will go ahead with this updates: 0) Rebase with Yi's pipelining changes 1) Currently batching is disabled by default to make sure that it will be consistent with all unit tests. Will make this optional via a config. 2) A couple of unit tests are disabled. They need to be updated with the serial commit of 2PC taken into account. 3) Replacing BatchGroup with mem_mutex_ got a bit ugly as it requires releasing mutex_ beforehand (the same way EnterUnbatched does). This needs to be cleaned up. Closes https://github.com/facebook/rocksdb/pull/2345 Differential Revision: D5210732 Pulled By: maysamyabandeh fbshipit-source-id: 78653bd95a35cd1e831e555e0e57bdfd695355a4 2017-06-24 23:06:43 +02:00			`All code is released to the public domain. For business purposes, Murmurhash`
			`is under the MIT license.`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`*/`
			`#include "murmurhash.h"`
C++20 compatibility (#6697) Summary: Based on https://github.com/facebook/rocksdb/issues/6648 (CLA Signed), but heavily modified / extended: * Implicit capture of this via [=] deprecated in C++20, and [=,this] not standard before C++20 -> now using explicit capture lists * Implicit copy operator deprecated in gcc 9 -> add explicit '= default' definition * std::random_shuffle deprecated in C++17 and removed in C++20 -> migrated to a replacement in RocksDB random.h API * Add the ability to build with different std version though -DCMAKE_CXX_STANDARD=11/14/17/20 on the cmake command line * Minimal rebuild flag of MSVC is deprecated and is forbidden with /std:c++latest (C++20) * Added MSVC 2019 C++11 & MSVC 2019 C++20 in AppVeyor * Added GCC 9 C++11 & GCC9 C++20 in Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/6697 Test Plan: make check and CI Reviewed By: cheng-chang Differential Revision: D21020318 Pulled By: pdillinger fbshipit-source-id: 12311be5dbd8675a0e2c817f7ec50fa11c18ab91 2020-04-20 22:21:34 +02:00			`#include "port/lang.h"`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00
			`#if defined(__x86_64__)`

			`// -------------------------------------------------------------------`
			`//`
			`// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment`
			`// and endian-ness issues if used across multiple platforms.`
			`//`
			`// 64-bit hash for 64-bit platforms`

Suppress UBSAN error in finer guanularity Summary: Now we suppress alignment UBSAN error as a whole. Suppressing 3-way CRC and murmurhash feels a better idea than turning off alignment check as a whole. Closes https://github.com/facebook/rocksdb/pull/3495 Differential Revision: D6971273 Pulled By: siying fbshipit-source-id: 080b59fed6df494b9f622ef7cb5d42d39e6a8cdf 2018-02-13 21:05:36 +01:00			`#ifdef ROCKSDB_UBSAN_RUN`
			`#if defined(__clang__)`
			`__attribute__((__no_sanitize__("alignment")))`
			`#elif defined(__GNUC__)`
			`__attribute__((__no_sanitize_undefined__))`
			`#endif`
			`#endif`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )`
			`{`
			`const uint64_t m = 0xc6a4a7935bd1e995;`
			`const int r = 47;`

			`uint64_t h = seed ^ (len * m);`

			`const uint64_t * data = (const uint64_t *)key;`
			`const uint64_t * end = data + (len/8);`

			`while(data != end)`
			`{`
			`uint64_t k = *data++;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h ^= k;`
			`h *= m;`
			`}`

			`const unsigned char * data2 = (const unsigned char*)data;`

			`switch(len & 7)`
			`{`
Add GCC 8 to Travis (#3433) Summary: - Avoid `strdup` to use jemalloc on Windows - Use `size_t` for consistency - Add GCC 8 to Travis - Add CMAKE_BUILD_TYPE=Release to Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/3433 Differential Revision: D6837948 Pulled By: sagar0 fbshipit-source-id: b8543c3a4da9cd07ee9a33f9f4623188e233261f 2018-07-13 19:47:49 +02:00			`case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED;`
			`case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED;`
			`case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED;`
			`case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED;`
			`case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED;`
			`case 2: h ^= ((uint64_t)data2[1]) << 8; FALLTHROUGH_INTENDED;`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`case 1: h ^= ((uint64_t)data2[0]);`
			`h *= m;`
			`};`

			`h ^= h >> r;`
			`h *= m;`
			`h ^= h >> r;`

			`return h;`
			`}`

			`#elif defined(__i386__)`

			`// -------------------------------------------------------------------`
			`//`
			`// Note - This code makes a few assumptions about how your machine behaves -`
			`//`
			`// 1. We can read a 4-byte value from any address without crashing`
			`// 2. sizeof(int) == 4`
			`//`
			`// And it has a few limitations -`
			`//`
			`// 1. It will not work incrementally.`
			`// 2. It will not produce the same results on little-endian and big-endian`
			`// machines.`

			`unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )`
			`{`
			`// 'm' and 'r' are mixing constants generated offline.`
			`// They're not really 'magic', they just happen to work well.`

			`const unsigned int m = 0x5bd1e995;`
			`const int r = 24;`

			`// Initialize the hash to a 'random' value`

			`unsigned int h = seed ^ len;`

			`// Mix 4 bytes at a time into the hash`

			`const unsigned char * data = (const unsigned char *)key;`

			`while(len >= 4)`
			`{`
			`unsigned int k = (unsigned int )data;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h *= m;`
			`h ^= k;`

			`data += 4;`
			`len -= 4;`
			`}`

			`// Handle the last few bytes of the input array`

			`switch(len)`
			`{`
Add GCC 8 to Travis (#3433) Summary: - Avoid `strdup` to use jemalloc on Windows - Use `size_t` for consistency - Add GCC 8 to Travis - Add CMAKE_BUILD_TYPE=Release to Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/3433 Differential Revision: D6837948 Pulled By: sagar0 fbshipit-source-id: b8543c3a4da9cd07ee9a33f9f4623188e233261f 2018-07-13 19:47:49 +02:00			`case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;`
			`case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED;`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`case 1: h ^= data[0];`
			`h *= m;`
			`};`

			`// Do a few final mixes of the hash to ensure the last few`
			`// bytes are well-incorporated.`

			`h ^= h >> 13;`
			`h *= m;`
			`h ^= h >> 15;`

			`return h;`
			`}`

			`#else`

			`// -------------------------------------------------------------------`
			`//`
			`// Same as MurmurHash2, but endian- and alignment-neutral.`
			`// Half the speed though, alas.`

			`unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )`
			`{`
			`const unsigned int m = 0x5bd1e995;`
			`const int r = 24;`

			`unsigned int h = seed ^ len;`

			`const unsigned char * data = (const unsigned char *)key;`

			`while(len >= 4)`
			`{`
			`unsigned int k;`

			`k = data[0];`
			`k \|= data[1] << 8;`
			`k \|= data[2] << 16;`
			`k \|= data[3] << 24;`

			`k *= m;`
			`k ^= k >> r;`
			`k *= m;`

			`h *= m;`
			`h ^= k;`

			`data += 4;`
			`len -= 4;`
			`}`

			`switch(len)`
			`{`
Add GCC 8 to Travis (#3433) Summary: - Avoid `strdup` to use jemalloc on Windows - Use `size_t` for consistency - Add GCC 8 to Travis - Add CMAKE_BUILD_TYPE=Release to Travis Pull Request resolved: https://github.com/facebook/rocksdb/pull/3433 Differential Revision: D6837948 Pulled By: sagar0 fbshipit-source-id: b8543c3a4da9cd07ee9a33f9f4623188e233261f 2018-07-13 19:47:49 +02:00			`case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;`
			`case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED;`
Implement RowLocks for assoc schema Summary: Each assoc is identified by (id1, assocType). This is the rowkey. Each row has a read/write rowlock. There is statically allocated array of 2000 read/write locks. A rowkey is murmur-hashed to one of the read/write locks. assocPut and assocDelete acquires the rowlock in Write mode. The key-updates are done within the rowlock with a atomic nosync batch write to leveldb. Then the rowlock is released and a write-with-sync is done to sync leveldb transaction log. Test Plan: added unit test Reviewers: heyongqiang Reviewed By: heyongqiang Differential Revision: https://reviews.facebook.net/D5859 2012-10-04 01:35:53 +02:00			`case 1: h ^= data[0];`
			`h *= m;`
			`};`

			`h ^= h >> 13;`
			`h *= m;`
			`h ^= h >> 15;`

			`return h;`
			`}`

			`#endif`