a378a4c2ac
Summary: prefetch data for following block,avoid cache miss when doing crc caculate I do performance test at kunpeng-920 server(arm-v8, 64core@2.6GHz) ./db_bench --benchmarks=crc32c --block_size=500000000 before optimise : 587313.500 micros/op 1 ops/sec; 811.9 MB/s (500000000 per op) after optimise : 289248.500 micros/op 3 ops/sec; 1648.5 MB/s (500000000 per op) Pull Request resolved: https://github.com/facebook/rocksdb/pull/5773 Differential Revision: D17347339 fbshipit-source-id: bfcd74f0f0eb4b322b959be68019ddcaae1e3341
44 lines
1.6 KiB
C
44 lines
1.6 KiB
C
// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#ifndef UTIL_CRC32C_ARM64_H
|
|
#define UTIL_CRC32C_ARM64_H
|
|
|
|
#include <cinttypes>
|
|
|
|
#if defined(__aarch64__) || defined(__AARCH64__)
|
|
|
|
#ifdef __ARM_FEATURE_CRC32
|
|
#define HAVE_ARM64_CRC
|
|
#include <arm_acle.h>
|
|
#define crc32c_u8(crc, v) __crc32cb(crc, v)
|
|
#define crc32c_u16(crc, v) __crc32ch(crc, v)
|
|
#define crc32c_u32(crc, v) __crc32cw(crc, v)
|
|
#define crc32c_u64(crc, v) __crc32cd(crc, v)
|
|
#define PREF4X64L1(buffer,PREF_OFFSET, ITR) \
|
|
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
|
|
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
|
|
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
|
|
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
|
|
|
|
#define PREF1KL1(buffer,PREF_OFFSET) \
|
|
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
|
|
PREF4X64L1(buffer,(PREF_OFFSET), 4) \
|
|
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
|
|
PREF4X64L1(buffer,(PREF_OFFSET), 12)
|
|
|
|
extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, unsigned len);
|
|
extern uint32_t crc32c_runtime_check(void);
|
|
|
|
#ifdef __ARM_FEATURE_CRYPTO
|
|
#define HAVE_ARM64_CRYPTO
|
|
#include <arm_neon.h>
|
|
#endif // __ARM_FEATURE_CRYPTO
|
|
#endif // __ARM_FEATURE_CRC32
|
|
|
|
#endif // defined(__aarch64__) || defined(__AARCH64__)
|
|
|
|
#endif
|