From b31ba89c3141ee0741037c0c1638227f88b79e21 Mon Sep 17 00:00:00 2001 From: Krunal Bauskar Date: Fri, 27 Mar 2020 09:06:27 +0000 Subject: [PATCH] - Using CRC32 ARM intrinsic calls to calculate checksum. - There are 2 types of checksum: crc32, crc32c. - crc32 is traditional crc32 found in most of the zip utilities. - crc32c (crc32 castagnoli) uses different polynomial and new generation platform can compute full 32 bit crc32c in 3 cycles. - MySQL uses both of them. crc32 is used for calculating table and binlog checksum and crc32c is used by InnoDB for page-checksum. - ARM ACLE added intrinsic support for both crc32 variants. (As per my reading I haven't seen the support for crc32 on x86-sse. x86-sse has crc32c support). - Currently MySQL calculate crc32 using zlib (software based approach). Said patch help optimize use of crc32 (on ARM) by leveraging the corresponding ARM ACLE supported crc32 variants (crc32[b|h|w|d]). --- config.h.cmake | 3 ++ configure.cmake | 56 +++++++++++++++++++++++++ libbinlogevents/include/binlog_event.h | 4 +- mysys/checksum.cc | 57 ++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 2 deletions(-) diff --git a/config.h.cmake b/config.h.cmake index f82ac828b57..da92bfea6e4 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -361,4 +361,7 @@ #define SO_EXT "@CMAKE_SHARED_MODULE_SUFFIX@" +/* ARM crc32 support */ +#cmakedefine HAVE_ARMV8_CRC32_INTRINSIC @HAVE_ARMV8_CRC32_INTRINSIC@ + #endif diff --git a/configure.cmake b/configure.cmake index dc54abf7f6f..1d09dd3ed13 100644 --- a/configure.cmake +++ b/configure.cmake @@ -770,3 +770,59 @@ IF(HAVE_LIBNUMA AND NOT WITH_NUMA) SET(HAVE_LIBNUMA 0) MESSAGE(STATUS "Disabling NUMA on user's request") ENDIF() + +# check for intrinsic crc32 support on arm +IF(LINUX) + IF (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + CHECK_INCLUDE_FILES(arm_acle.h HAVE_ACLE_H) + + IF (HAVE_ACLE_H) + + # CRC implementation is optional for ARMv8-A (alias to AARCH64) + # but mandatory for ARMv8.1A onwards. + # ideally if there is no march provided compiler should use + # native machine setting but compiler doesn't. + # This means even if the compiling machine is based on ARMv8.1 + # compiler fail to expose crc32 unless target architecture is set. + + # hopefully this bug will fix in compiler in due-course + # but till then we check if compiler w/o target architecture exposes + # crc32 if no then as fallback try to use target architecture. + + CHECK_CXX_SOURCE_COMPILES( + " + #include + int main() { + __crc32cb(0, 0); __crc32ch(0, 0); __crc32cw(0, 0); __crc32cd(0, 0); + __crc32b(0, 0); __crc32h(0, 0); __crc32w(0, 0); __crc32d(0, 0); + return 0; + }" + HAVE_ARMV8_CRC32) + + IF (NOT HAVE_ARMV8_CRC32) + set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS "-march=armv8-a+crc") + CHECK_CXX_SOURCE_COMPILES( + " + #include + int main() { + __crc32cb(0, 0); __crc32ch(0, 0); __crc32cw(0, 0); __crc32cd(0, 0); + __crc32b(0, 0); __crc32h(0, 0); __crc32w(0, 0); __crc32d(0, 0); + return 0; + }" + HAVE_ARMV8_CRC32_WITH_ARCH_EXTN) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS}) + if (HAVE_ARMV8_CRC32_WITH_ARCH_EXTN) + STRING_APPEND(CMAKE_CXX_FLAGS " -march=armv8-a+crc") + ENDIF() + ENDIF() + + IF (HAVE_ARMV8_CRC32 OR HAVE_ARMV8_CRC32_WITH_ARCH_EXTN) + MESSAGE(STATUS "ARMv8 crc32 intrinsic support available") + SET(HAVE_ARMV8_CRC32_INTRINSIC 1) + ENDIF() + + ENDIF() # arm_acle.h + ENDIF() # aarch64 +ENDIF() # linux + diff --git a/libbinlogevents/include/binlog_event.h b/libbinlogevents/include/binlog_event.h index 5bb4f006987..32b276d0592 100644 --- a/libbinlogevents/include/binlog_event.h +++ b/libbinlogevents/include/binlog_event.h @@ -45,6 +45,7 @@ #include "debug_vars.h" #include "event_reader.h" #include "my_io.h" +#include "my_sys.h" #if defined(_WIN32) #include @@ -443,8 +444,7 @@ enum enum_binlog_checksum_alg { inline uint32_t checksum_crc32(uint32_t crc, const unsigned char *pos, size_t length) { BAPI_ASSERT(length <= UINT_MAX); - return static_cast(crc32(static_cast(crc), pos, - static_cast(length))); + return my_checksum(crc, pos, length); } /* diff --git a/mysys/checksum.cc b/mysys/checksum.cc index 44784317a10..4f0de548e4d 100644 --- a/mysys/checksum.cc +++ b/mysys/checksum.cc @@ -36,6 +36,59 @@ #include "my_inttypes.h" #include "my_sys.h" +/* HAVE_ARMV8_CRC32_INTRINSIC is defined for linux + aarch64 + crc32 intrinsic*/ +#if defined(HAVE_ARMV8_CRC32_INTRINSIC) +#define ARM_CRC32_INTRINSIC_SUPPORTED +#include +#include +#include + +/* There are multiple approaches to calculate crc. +Approach-1: Process 8 bytes then 4 bytes then 2 bytes and then 1 bytes +Approach-2: Process 8 bytes and remaining workload using 1 bytes +Apporach-3: Process 64 bytes at once by issuing 8 crc call and remaining + using 8/1 combination. + +Based on micro-benchmark testing we found that Approach-2 works best especially +given small chunk of variable data. */ + +MY_ATTRIBUTE((target("+crc"))) +inline unsigned long aarch64_crc32_checksum(unsigned long crc32, + const unsigned char *buf, + unsigned int len) { + uint32_t crc = static_cast(crc32); + crc = ~crc; + + const uint64_t *buf8 = (const uint64_t *)buf; + while (len >= sizeof(uint64_t)) { + crc = __crc32d(crc, *buf8++); + len -= sizeof(uint64_t); + } + + const uint8_t *buf1 = (const uint8_t *)buf8; + while (len >= sizeof(uint8_t)) { + crc = __crc32b(crc, *buf1++); + len -= sizeof(uint8_t); + } + + return (~crc); +} + +/* Linux system call to findout CPU capabilities. */ +inline bool aarch64_crc32_supported() { + return (getauxval(AT_HWCAP) & HWCAP_CRC32); +} + +typedef unsigned long (*my_crc32_func_t)(unsigned long crc, + const unsigned char *ptr, + unsigned int len); + +/* Ideally all ARM 64 bit processor should support crc32 but if some model +doesn't support better to find it out through auxillary vector. */ +my_crc32_func_t my_crc32 = + aarch64_crc32_supported() ? aarch64_crc32_checksum : crc32; +#endif /* defined(HAVE_ARMV8_CRC32_INTRINSIC) */ + /* Calculate a long checksum for a memoryblock. @@ -47,5 +100,9 @@ */ ha_checksum my_checksum(ha_checksum crc, const uchar *pos, size_t length) { +#ifdef ARM_CRC32_INTRINSIC_SUPPORTED + return (ha_checksum)my_crc32((uint)crc, pos, (uint)length); +#else return (ha_checksum)crc32((uint)crc, pos, (uint)length); +#endif /* ARM_CRC32_INTRINSIC_SUPPORTED */ }