Implementation of Crc32c combine function (#8305)
Summary: Implement a function to generate the crc32c of two combined strings. Suppose we have the string 1 (s1) with crc32c checksum crc32c_1 and string 2 (s2) with crc32c checksum crc32c_2, the new string is s1+s2 and its checksum is crc32c_new=Crc32cCombine(crc32c_1, crc32c_2, s2.size). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8305 Test Plan: make check, added new testing case Reviewed By: pdillinger Differential Revision: D28651665 Pulled By: zhichao-cao fbshipit-source-id: c84116108388f11a81f6a217b49f99c70d4ffacf
This commit is contained in:
parent
d5bd0039b9
commit
ecccc63179
166
util/crc32c.cc
166
util/crc32c.cc
@ -10,15 +10,20 @@
|
||||
// A portable implementation of crc32c, optimized to handle
|
||||
// four bytes at a time.
|
||||
#include "util/crc32c.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <array>
|
||||
#include <utility>
|
||||
#ifdef HAVE_SSE42
|
||||
#include <nmmintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/lang.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
#include "util/crc32c_arm64.h"
|
||||
#include "util/math.h"
|
||||
|
||||
#ifdef __powerpc64__
|
||||
#include "util/crc32c_ppc.h"
|
||||
@ -1279,5 +1284,164 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
|
||||
}
|
||||
|
||||
|
||||
// The code for crc32c combine, copied with permission from folly
|
||||
|
||||
// Standard galois-field multiply. The only modification is that a,
|
||||
// b, m, and p are all bit-reflected.
|
||||
//
|
||||
// https://en.wikipedia.org/wiki/Finite_field_arithmetic
|
||||
static constexpr uint32_t gf_multiply_sw_1(
|
||||
size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
|
||||
// clang-format off
|
||||
return i == 32 ? p : gf_multiply_sw_1(
|
||||
/* i = */ i + 1,
|
||||
/* p = */ p ^ ((0u-((b >> 31) & 1)) & a),
|
||||
/* a = */ (a >> 1) ^ ((0u-(a & 1)) & m),
|
||||
/* b = */ b << 1,
|
||||
/* m = */ m);
|
||||
// clang-format on
|
||||
}
|
||||
static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
|
||||
return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
|
||||
}
|
||||
|
||||
static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
|
||||
return gf_multiply_sw(a, a, m);
|
||||
}
|
||||
|
||||
template <size_t i, uint32_t m>
|
||||
struct gf_powers_memo {
|
||||
static constexpr uint32_t value =
|
||||
gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
|
||||
};
|
||||
template <uint32_t m>
|
||||
struct gf_powers_memo<0, m> {
|
||||
static constexpr uint32_t value = m;
|
||||
};
|
||||
|
||||
template <typename T, T... Ints>
|
||||
struct integer_sequence {
|
||||
typedef T value_type;
|
||||
static constexpr size_t size() { return sizeof...(Ints); }
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N, T... Is>
|
||||
struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Is...> {};
|
||||
|
||||
template <typename T, T... Is>
|
||||
struct make_integer_sequence<T, 0, Is...> : integer_sequence<T, Is...> {};
|
||||
|
||||
template <std::size_t N>
|
||||
using make_index_sequence = make_integer_sequence<std::size_t, N>;
|
||||
|
||||
template <uint32_t m>
|
||||
struct gf_powers_make {
|
||||
template <size_t... i>
|
||||
using index_sequence = integer_sequence<size_t, i...>;
|
||||
template <size_t... i>
|
||||
constexpr std::array<uint32_t, sizeof...(i)> operator()(
|
||||
index_sequence<i...>) const {
|
||||
return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr uint32_t crc32c_m = 0x82f63b78;
|
||||
|
||||
static constexpr std::array<uint32_t, 62> const crc32c_powers =
|
||||
gf_powers_make<crc32c_m>{}(make_index_sequence<62>{});
|
||||
|
||||
// Expects a "pure" crc (see Crc32cCombine)
|
||||
static uint32_t Crc32AppendZeroes(
|
||||
uint32_t crc, size_t len_over_4, uint32_t polynomial,
|
||||
std::array<uint32_t, 62> const& powers_array) {
|
||||
auto powers = powers_array.data();
|
||||
// Append by multiplying by consecutive powers of two of the zeroes
|
||||
// array
|
||||
size_t len_bits = len_over_4;
|
||||
|
||||
while (len_bits) {
|
||||
// Advance directly to next bit set.
|
||||
auto r = CountTrailingZeroBits(len_bits);
|
||||
len_bits >>= r;
|
||||
powers += r;
|
||||
|
||||
crc = gf_multiply_sw(crc, *powers, polynomial);
|
||||
|
||||
len_bits >>= 1;
|
||||
powers++;
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static inline uint32_t InvertedToPure(uint32_t crc) { return ~crc; }
|
||||
|
||||
static inline uint32_t PureToInverted(uint32_t crc) { return ~crc; }
|
||||
|
||||
static inline uint32_t PureExtend(uint32_t crc, const char* buf, size_t size) {
|
||||
return InvertedToPure(Extend(PureToInverted(crc), buf, size));
|
||||
}
|
||||
|
||||
// Background:
|
||||
// RocksDB uses two kinds of crc32c values: masked and unmasked. Neither is
|
||||
// a "pure" CRC because a pure CRC satisfies (^ for xor)
|
||||
// crc(a ^ b) = crc(a) ^ crc(b)
|
||||
// The unmasked is closest, and this function takes unmasked crc32c values.
|
||||
// The unmasked values are impure in two ways:
|
||||
// * The initial setting at the start of CRC computation is all 1 bits
|
||||
// (like -1) instead of zero.
|
||||
// * The result has all bits invered.
|
||||
// Note that together, these result in the empty string having a crc32c of
|
||||
// zero. See
|
||||
// https://en.wikipedia.org/wiki/Computation_of_cyclic_redundancy_checks#CRC_variants
|
||||
//
|
||||
// Simplified version of strategy, using xor through pure CRCs (+ for concat):
|
||||
//
|
||||
// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
|
||||
// pure_crc(zeros(len(str1)) + str2)
|
||||
//
|
||||
// because the xor of these two zero-padded strings is str1 + str2. For pure
|
||||
// CRC, leading zeros don't affect the result, so we only need
|
||||
//
|
||||
// pure_crc(str1 + str2) = pure_crc(str1 + zeros(len(str2))) ^
|
||||
// pure_crc(str2)
|
||||
//
|
||||
// Considering we aren't working with pure CRCs, what is actually in the input?
|
||||
//
|
||||
// crc1 = PureToInverted(PureExtendCrc32c(-1, zeros, crc1len) ^
|
||||
// PureCrc32c(str1, crc1len))
|
||||
// crc2 = PureToInverted(PureExtendCrc32c(-1, zeros, crc2len) ^
|
||||
// PureCrc32c(str2, crc2len))
|
||||
//
|
||||
// The result we want to compute is
|
||||
// combined = PureToInverted(PureExtendCrc32c(PureExtendCrc32c(-1, zeros,
|
||||
// crc1len) ^
|
||||
// PureCrc32c(str1, crc1len),
|
||||
// zeros, crc2len) ^
|
||||
// PureCrc32c(str2, crc2len))
|
||||
//
|
||||
// Thus, in addition to extending crc1 over the length of str2 in (virtual)
|
||||
// zeros, we need to cancel out the -1 initializer that was used in computing
|
||||
// crc2. To cancel it out, we also need to extend it over crc2len in zeros.
|
||||
// To simplify, since the end of str1 and that -1 initializer for crc2 are at
|
||||
// the same logical position, we can combine them before we extend over the
|
||||
// zeros.
|
||||
uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len) {
|
||||
uint32_t pure_crc1_with_init = InvertedToPure(crc1);
|
||||
uint32_t pure_crc2_with_init = InvertedToPure(crc2);
|
||||
uint32_t pure_crc2_init = static_cast<uint32_t>(-1);
|
||||
|
||||
// Append up to 32 bits of zeroes in the normal way
|
||||
char zeros[4] = {0, 0, 0, 0};
|
||||
auto len = crc2len & 3;
|
||||
uint32_t tmp = pure_crc1_with_init ^ pure_crc2_init;
|
||||
if (len) {
|
||||
tmp = PureExtend(tmp, zeros, len);
|
||||
}
|
||||
return PureToInverted(
|
||||
Crc32AppendZeroes(tmp, crc2len / 4, crc32c_m, crc32c_powers) ^
|
||||
pure_crc2_with_init);
|
||||
}
|
||||
|
||||
} // namespace crc32c
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
@ -24,6 +24,12 @@ extern std::string IsFastCrc32Supported();
|
||||
// crc32c of a stream of data.
|
||||
extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
|
||||
|
||||
// Takes two unmasked crc32c values, and the length of the string from
|
||||
// which `crc2` was computed, and computes a crc32c value for the
|
||||
// concatenation of the original two input strings. Running time is
|
||||
// ~ log(crc2len).
|
||||
extern uint32_t Crc32cCombine(uint32_t crc1, uint32_t crc2, size_t crc2len);
|
||||
|
||||
// Return the crc32c of data[0,n-1]
|
||||
inline uint32_t Value(const char* data, size_t n) {
|
||||
return Extend(0, data, n);
|
||||
|
@ -7,8 +7,10 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
#include "util/crc32c.h"
|
||||
|
||||
#include "test_util/testharness.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/random.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
namespace crc32c {
|
||||
@ -137,6 +139,51 @@ TEST(CRC, Mask) {
|
||||
ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc)))));
|
||||
}
|
||||
|
||||
TEST(CRC, Crc32cCombineBasicTest) {
|
||||
uint32_t crc1 = Value("hello ", 6);
|
||||
uint32_t crc2 = Value("world", 5);
|
||||
uint32_t crc3 = Value("hello world", 11);
|
||||
uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, 5);
|
||||
ASSERT_EQ(crc3, crc1_2_combine);
|
||||
}
|
||||
|
||||
TEST(CRC, Crc32cCombineOrderMattersTest) {
|
||||
uint32_t crc1 = Value("hello ", 6);
|
||||
uint32_t crc2 = Value("world", 5);
|
||||
uint32_t crc3 = Value("hello world", 11);
|
||||
uint32_t crc2_1_combine = Crc32cCombine(crc2, crc1, 6);
|
||||
ASSERT_NE(crc3, crc2_1_combine);
|
||||
}
|
||||
|
||||
TEST(CRC, Crc32cCombineFullCoverTest) {
|
||||
int scale = 4 * 1024;
|
||||
Random rnd(test::RandomSeed());
|
||||
int size_1 = 1024 * 1024;
|
||||
std::string s1 = rnd.RandomBinaryString(size_1);
|
||||
uint32_t crc1 = Value(s1.data(), size_1);
|
||||
for (int i = 0; i < scale; i++) {
|
||||
int size_2 = i;
|
||||
std::string s2 = rnd.RandomBinaryString(size_2);
|
||||
uint32_t crc2 = Value(s2.data(), s2.size());
|
||||
uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size());
|
||||
uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2);
|
||||
ASSERT_EQ(crc1_2, crc1_2_combine);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CRC, Crc32cCombineBigSizeTest) {
|
||||
Random rnd(test::RandomSeed());
|
||||
int size_1 = 1024 * 1024;
|
||||
std::string s1 = rnd.RandomBinaryString(size_1);
|
||||
uint32_t crc1 = Value(s1.data(), size_1);
|
||||
int size_2 = 16 * 1024 * 1024 - 1;
|
||||
std::string s2 = rnd.RandomBinaryString(size_2);
|
||||
uint32_t crc2 = Value(s2.data(), s2.size());
|
||||
uint32_t crc1_2 = Extend(crc1, s2.data(), s2.size());
|
||||
uint32_t crc1_2_combine = Crc32cCombine(crc1, crc2, size_2);
|
||||
ASSERT_EQ(crc1_2, crc1_2_combine);
|
||||
}
|
||||
|
||||
} // namespace crc32c
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user