From dc942becd90e703b4ec9ebf199e7c041d767019c Mon Sep 17 00:00:00 2001 From: guyuqi Date: Thu, 6 Apr 2017 05:04:46 +0000 Subject: [PATCH] Bug #85819 Add AArch64 optimized crc32c implementation --- cmake/build_configurations/compiler_options.cmake | 12 +++- config.h.cmake | 3 + configure.cmake | 6 ++ storage/innobase/ut/ut0crc32.cc | 82 +++++++++++++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) diff --git a/cmake/build_configurations/compiler_options.cmake b/cmake/build_configurations/compiler_options.cmake index c112418..4d72f44 100644 --- a/cmake/build_configurations/compiler_options.cmake +++ b/cmake/build_configurations/compiler_options.cmake @@ -30,7 +30,11 @@ IF(UNIX) # Default GCC flags IF(CMAKE_COMPILER_IS_GNUCC) - SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto+crc") + ELSE() + SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + ENDIF() # Disable inline optimizations for valgrind testing to avoid false positives IF(WITH_VALGRIND) SET(COMMON_C_FLAGS "-fno-inline ${COMMON_C_FLAGS}") @@ -54,7 +58,11 @@ IF(UNIX) SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 ${COMMON_C_FLAGS}") ENDIF() IF(CMAKE_COMPILER_IS_GNUCXX) - SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto+crc") + ELSE() + SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + ENDIF() # GCC 6 has C++14 as default, set it explicitly to the old default. EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GXX_VERSION) diff --git a/config.h.cmake b/config.h.cmake index c89493a..d87ef82 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -448,4 +448,7 @@ /* For default value of --early_plugin_load */ #cmakedefine DEFAULT_EARLY_PLUGIN_LOAD @DEFAULT_EARLY_PLUGIN_LOAD@ +/* Support ARMv8 CRC instructions */ +#cmakedefine ENABLE_ARMV8_CRC32 + #endif diff --git a/configure.cmake b/configure.cmake index cb8aa67..e239187 100644 --- a/configure.cmake +++ b/configure.cmake @@ -929,3 +929,9 @@ CHECK_TYPE_SIZE("socklen_t" SIZEOF_SOCKLEN_T) IF(SIZEOF_SOCKLEN_T) SET(HAVE_SOCKLEN_T 1) ENDIF() + +# Enable crc32 on AArch64 Platform +IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + MESSAGE(STATUS "ARMv8 crc32 enabled.") + SET(ENABLE_ARMV8_CRC32 1) +ENDIF() diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc index 979713f..b94408d 100644 --- a/storage/innobase/ut/ut0crc32.cc +++ b/storage/innobase/ut/ut0crc32.cc @@ -114,11 +114,20 @@ ut_crc32_swap_byteorder( | i >> 56); } + /* CRC32 hardware implementation. */ +#ifdef ENABLE_ARMV8_CRC32 +#define ARM_CRC32_INTRINSIC +#include +#include +#else +#undef ARM_CRC32_INTRINSIC +#endif /* Flag that tells whether the CPU supports CRC32 or not */ bool ut_crc32_sse2_enabled = false; + #if defined(__GNUC__) && defined(__x86_64__) /********************************************************************//** Fetches CPU info */ @@ -421,6 +430,70 @@ ut_crc32_byte_by_byte_hw( } #endif /* defined(__GNUC__) && defined(__x86_64__) */ +/************* + * For AArch64 + */ + +#ifdef ARM_CRC32_INTRINSIC +uint32_t +ut_crc32_byte_by_byte_aarch64( + const byte* buf, + ulint len) +{ + uint32_t crc = 0xFFFFFFFFU; + + ut_a(ut_crc32_sse2_enabled); + + while (len > 0) { + crc = __crc32cb(crc, *buf++); + len--; + } + + return(~crc); +} + + +uint32_t +ut_crc32_aarch64( + const byte* buf, + ulint len) +{ + register uint32_t crc = 0xFFFFFFFFU; + register const uint16_t *buf2; + register const uint32_t *buf4; + register const uint64_t *buf8; + + ut_a(ut_crc32_sse2_enabled); + + int64_t length = (int64_t)len; + buf8 = (const uint64_t *)(const void *)buf; + while ((length -= sizeof(uint64_t)) >= 0) { + crc = __crc32cd(crc, *buf8++); + } + + /* The following is more efficient than the straight loop */ + buf4 = (const uint32_t *)(const void *)buf8; + if (length & sizeof(uint32_t)) { + crc = __crc32cw(crc, *buf4++); + length -= 4; + } + + buf2 = (const uint16_t *)(const void *)buf4; + if (length & sizeof(uint16_t)) { + crc = __crc32ch(crc, *buf2++); + length -= 2; + } + + buf = (const uint8_t *)(const void *)buf2; + if (length & sizeof(uint8_t)) + crc = __crc32cb(crc, *buf); + + return(~crc); +} + +#endif /*ARM_CRC32_INTRINSIC*/ + + /* CRC32 software implementation. */ /* Precalculated table used to generate the CRC32 if the CPU does not @@ -727,6 +800,15 @@ ut_crc32_init() #endif /* defined(__GNUC__) && defined(__x86_64__) */ +#ifdef ARM_CRC32_INTRINSIC + ut_crc32_sse2_enabled = 0x1; + if (ut_crc32_sse2_enabled) { + ut_crc32 = ut_crc32_aarch64; + ut_crc32_legacy_big_endian = NULL; + ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_aarch64; + } +#endif + if (!ut_crc32_sse2_enabled) { ut_crc32_slice8_table_init(); ut_crc32 = ut_crc32_sw; -- 2.7.4