From dc942becd90e703b4ec9ebf199e7c041d767019c Mon Sep 17 00:00:00 2001
From: guyuqi <yuqi.gu@arm.com>
Date: Thu, 6 Apr 2017 05:04:46 +0000
Subject: [PATCH] Bug #85819 Add AArch64 optimized crc32c implementation

---
 cmake/build_configurations/compiler_options.cmake | 12 +++-
 config.h.cmake                                    |  3 +
 configure.cmake                                   |  6 ++
 storage/innobase/ut/ut0crc32.cc                   | 82 +++++++++++++++++++++++
 4 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/cmake/build_configurations/compiler_options.cmake b/cmake/build_configurations/compiler_options.cmake
index c112418..4d72f44 100644
--- a/cmake/build_configurations/compiler_options.cmake
+++ b/cmake/build_configurations/compiler_options.cmake
@@ -30,7 +30,11 @@ IF(UNIX)
 
   # Default GCC flags
   IF(CMAKE_COMPILER_IS_GNUCC)
-    SET(COMMON_C_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing")
+    IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+      SET(COMMON_C_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto+crc")
+    ELSE()
+      SET(COMMON_C_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing")
+    ENDIF()
     # Disable inline optimizations for valgrind testing to avoid false positives
     IF(WITH_VALGRIND)
       SET(COMMON_C_FLAGS             "-fno-inline ${COMMON_C_FLAGS}")
@@ -54,7 +58,11 @@ IF(UNIX)
     SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 ${COMMON_C_FLAGS}")
   ENDIF()
   IF(CMAKE_COMPILER_IS_GNUCXX)
-    SET(COMMON_CXX_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing")
+    IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+      SET(COMMON_CXX_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto+crc")
+    ELSE()
+      SET(COMMON_CXX_FLAGS               "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing")
+    ENDIF()
     # GCC 6 has C++14 as default, set it explicitly to the old default.
     EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion
                     OUTPUT_VARIABLE GXX_VERSION)
diff --git a/config.h.cmake b/config.h.cmake
index c89493a..d87ef82 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -448,4 +448,7 @@
 /* For default value of --early_plugin_load */
 #cmakedefine DEFAULT_EARLY_PLUGIN_LOAD @DEFAULT_EARLY_PLUGIN_LOAD@
 
+/* Support ARMv8 CRC instructions */
+#cmakedefine ENABLE_ARMV8_CRC32
+
 #endif
diff --git a/configure.cmake b/configure.cmake
index cb8aa67..e239187 100644
--- a/configure.cmake
+++ b/configure.cmake
@@ -929,3 +929,9 @@ CHECK_TYPE_SIZE("socklen_t" SIZEOF_SOCKLEN_T)
 IF(SIZEOF_SOCKLEN_T)
   SET(HAVE_SOCKLEN_T 1)
 ENDIF()
+
+# Enable crc32 on  AArch64 Platform
+IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+  MESSAGE(STATUS "ARMv8 crc32 enabled.")
+  SET(ENABLE_ARMV8_CRC32  1)
+ENDIF()
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 979713f..b94408d 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -114,11 +114,20 @@ ut_crc32_swap_byteorder(
 	       | i >> 56);
 }
 
+
 /* CRC32 hardware implementation. */
+#ifdef ENABLE_ARMV8_CRC32
+#define ARM_CRC32_INTRINSIC
+#include <arm_acle.h>
+#include <arm_neon.h>
+#else
+#undef ARM_CRC32_INTRINSIC
+#endif
 
 /* Flag that tells whether the CPU supports CRC32 or not */
 bool	ut_crc32_sse2_enabled = false;
 
+
 #if defined(__GNUC__) && defined(__x86_64__)
 /********************************************************************//**
 Fetches CPU info */
@@ -421,6 +430,70 @@ ut_crc32_byte_by_byte_hw(
 }
 #endif /* defined(__GNUC__) && defined(__x86_64__) */
 
+/*************
+ * For AArch64
+ */
+
+#ifdef ARM_CRC32_INTRINSIC
+uint32_t
+ut_crc32_byte_by_byte_aarch64(
+	const byte*	buf,
+	ulint		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	ut_a(ut_crc32_sse2_enabled);
+
+	while (len > 0) {
+		crc = __crc32cb(crc, *buf++);
+		len--;
+	}
+
+	return(~crc);
+}
+
+
+uint32_t
+ut_crc32_aarch64(
+	const byte*	buf,
+	ulint		len)
+{
+	register uint32_t	crc = 0xFFFFFFFFU;
+	register const uint16_t *buf2;
+	register const uint32_t *buf4;
+	register const uint64_t *buf8;
+
+	ut_a(ut_crc32_sse2_enabled);
+
+	int64_t length = (int64_t)len;
+	buf8 = (const  uint64_t *)(const void *)buf;
+	while ((length -= sizeof(uint64_t)) >= 0) {
+		crc = __crc32cd(crc, *buf8++);
+	}
+
+	/* The following is more efficient than the straight loop */
+	buf4 = (const  uint32_t *)(const void *)buf8;
+	if (length & sizeof(uint32_t)) {
+		crc = __crc32cw(crc, *buf4++);
+		length -= 4;
+	}
+
+	buf2 = (const  uint16_t *)(const void *)buf4;
+	if (length & sizeof(uint16_t)) {
+		crc = __crc32ch(crc, *buf2++);
+		length -= 2;
+	}
+
+	buf = (const  uint8_t *)(const void *)buf2;
+	if (length & sizeof(uint8_t))
+		crc = __crc32cb(crc, *buf);
+
+	return(~crc);
+}
+
+#endif /*ARM_CRC32_INTRINSIC*/
+
+
 /* CRC32 software implementation. */
 
 /* Precalculated table used to generate the CRC32 if the CPU does not
@@ -727,6 +800,15 @@ ut_crc32_init()
 
 #endif /* defined(__GNUC__) && defined(__x86_64__) */
 
+#ifdef ARM_CRC32_INTRINSIC
+	ut_crc32_sse2_enabled = 0x1;
+	if (ut_crc32_sse2_enabled) {
+		ut_crc32 = ut_crc32_aarch64;
+		ut_crc32_legacy_big_endian = NULL;
+		ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_aarch64;
+	}
+#endif
+
 	if (!ut_crc32_sse2_enabled) {
 		ut_crc32_slice8_table_init();
 		ut_crc32 = ut_crc32_sw;
-- 
2.7.4