diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 979713f..c35802d 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -114,11 +114,21 @@ ut_crc32_swap_byteorder(
 	       | i >> 56);
 }
 
+
 /* CRC32 hardware implementation. */
+//#if 0
+#ifdef ENABLE_ARMV8_CRC32
+#define ARM_CRC32_INTRINSIC
+#include <arm_acle.h>
+#include <arm_neon.h>
+#else
+#undef ARM_CRC32_INTRINSIC
+#endif
 
 /* Flag that tells whether the CPU supports CRC32 or not */
 bool	ut_crc32_sse2_enabled = false;
 
+
 #if defined(__GNUC__) && defined(__x86_64__)
 /********************************************************************//**
 Fetches CPU info */
@@ -421,17 +431,238 @@ ut_crc32_byte_by_byte_hw(
 }
 #endif /* defined(__GNUC__) && defined(__x86_64__) */
 
+/*************************For AArch64*********************************************
+ *******************************************************************************
+ */
+
+#ifdef ARM_CRC32_INTRINSIC
+
+/******************************************************
+ * 
+ * For optimization based on crc+crypto instructions
+ *
+ ********************************************************/
+#define CRC32C3X8(buffer,ITR) \
+	crc1 = __crc32cd(crc1, *((const uint64_t *)buffer + 42*1 + (ITR)));\
+	crc2 = __crc32cd(crc2, *((const uint64_t *)buffer + 42*2 + (ITR)));\
+	crc0 = __crc32cd(crc0, *((const uint64_t *)buffer + 42*0 + (ITR)));
+
+#define CRC32C7X3X8(buffer,ITR) do {\
+	CRC32C3X8(buffer,(ITR)*7+0) \
+	CRC32C3X8(buffer,(ITR)*7+1) \
+	CRC32C3X8(buffer,(ITR)*7+2) \
+	CRC32C3X8(buffer,(ITR)*7+3) \
+	CRC32C3X8(buffer,(ITR)*7+4) \
+	CRC32C3X8(buffer,(ITR)*7+5) \
+	CRC32C3X8(buffer,(ITR)*7+6) \
+	} while(0)
+
+
+#define PREF4X64L1(buffer,PREF_OFFSET, ITR) \
+	__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
+	__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
+	__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
+	__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
+
+#define PREF1KL1(buffer,PREF_OFFSET) \
+	PREF4X64L1(buffer,(PREF_OFFSET), 0) \
+	PREF4X64L1(buffer,(PREF_OFFSET), 4) \
+	PREF4X64L1(buffer,(PREF_OFFSET), 8) \
+	PREF4X64L1(buffer,(PREF_OFFSET), 12)
+
+#define PREF4X64L2(buffer,PREF_OFFSET, ITR) \
+	__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
+	__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
+	__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
+	__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
+
+#define PREF1KL2(buffer,PREF_OFFSET) \
+	PREF4X64L2(buffer,(PREF_OFFSET), 0) \
+	PREF4X64L2(buffer,(PREF_OFFSET), 4) \
+	PREF4X64L2(buffer,(PREF_OFFSET), 8) \
+	PREF4X64L2(buffer,(PREF_OFFSET), 12)
+
+uint32_t
+ut_crc32_byte_by_byte_aarch64(
+	const uint8_t*	buf,
+	uint64_t		len)
+{
+	uint32_t	crc = 0xFFFFFFFFU;
+
+	ut_a(ut_crc32_sse2_enabled);
+
+	while (len > 0) {
+		crc = __crc32cb(crc, *buf++);
+		len--;
+	}
+
+	return(~crc);
+}
+
+uint32_t
+ut_crc32_aarch64(
+	const uint8_t*	buf,
+	uint64_t	len)
+{
+	register uint32_t	crc = 0xFFFFFFFFU;
+	register const uint16_t *buf2;
+	register const uint32_t *buf4;
+	register const uint64_t *buf8;
+
+	ut_a(ut_crc32_sse2_enabled);
+
+#if 0
+	int64_t length = (int64_t)len;
+	buf8 = (const  uint64_t *)(const void *)buf;
+
+	while ((length -= sizeof(uint64_t)) >= 0) {
+		crc = __crc32cd(crc, *buf8++);
+	}
+
+	/* The following is more efficient than the straight loop */
+	buf4 = (const  uint32_t *)(const void *)buf8;
+	if (length & sizeof(uint32_t)) {
+		crc = __crc32cw(crc, *buf4++);
+	}
+
+	buf2 = (const  uint16_t *)(const void *)buf4;
+	if (length & sizeof(uint16_t)) {
+		crc = __crc32ch(crc, *buf2++);
+	}
+
+	buf = (const  uint8_t *)(const void *)buf2;
+	if (length & sizeof(uint8_t))
+		crc = __crc32cb(crc, *buf);
+#endif
+
+#if 0
+         /* Calculate byte-by-byte up to an 8-byte aligned address. After
+         this consume the input 8-bytes at a time. */
+         while (len > 0 && (reinterpret_cast<uintptr_t>(buf) & 7) != 0) {
+                 crc = __crc32cb(crc, *buf++);
+                 len--;
+         }
+
+
+	buf8 = (const  uint64_t *)(const void *)buf;
+        while (len >= 128) {
+                /* This call is repeated 16 times. 16 * 8 = 128. */
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                crc = __crc32cd(crc, *buf8++);
+                len -= 128;
+        }
+
+	while (len >= 8) {
+		crc = __crc32cd(crc, *buf8++);
+		len -= 8;
+	}
+
+	buf = (const unsigned char *)buf8;
+
+	if(len) do {
+		crc = __crc32cb(crc, *buf++);
+
+	} while (--len);
+#endif /*if 1*/
+
+#if 1
+	uint32_t crc0, crc1, crc2;
+	int64_t length = (int64_t)len;
+	buf8 = (const  uint64_t *)(const void *)buf;
+
+	/* Calculate reflected crc with PMULL Instruction */
+	const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
+	uint64_t t0, t1;
+
+	/* crc done "by 3" for fixed input block size of 1024 bytes */
+	while ((length -= 1024) >= 0) {
+		/* Prefetch data for following block to avoid cache miss */
+		PREF1KL2(buf,1024*3);
+		/* Do first 8 bytes here for better pipelining */
+		crc0 = __crc32cd(crc, *buf8++);
+		crc1 = 0;
+		crc2 = 0;
+
+		/* Process block inline
+		Process crc0 last to avoid dependency with above */
+		CRC32C7X3X8(buf8,0);
+		CRC32C7X3X8(buf8,1);
+		CRC32C7X3X8(buf8,2);
+		CRC32C7X3X8(buf8,3);
+		CRC32C7X3X8(buf8,4);
+		CRC32C7X3X8(buf8,5);
+
+		buf8 += 42*3;
+		/* Prefetch data for following block to avoid cache miss */
+		PREF1KL1((uint8_t *)buf8,1024);
+
+		/* Merge crc0 and crc1 into crc2
+			  crc1 multiply by K2
+			  crc0 multiply by K1 */
+
+		t1 = (uint64_t)vmull_p64(crc1, k2);
+		t0 = (uint64_t)vmull_p64(crc0, k1);
+		crc = __crc32cd(crc2, *buf8++);
+		crc1 = __crc32cd(0, t1);
+		crc ^= crc1;
+		crc0 = __crc32cd(0, t0);
+		crc ^= crc0;
+	}
+
+	if(!(length += 1024))
+		return (~crc);
+	
+	while ((length -= sizeof(uint64_t)) >= 0) {
+		crc = __crc32cd(crc, *buf8++);
+	}
+
+	/* The following is more efficient than the straight loop */
+	buf4 = (const  uint32_t *)(const void *)buf8;
+	if (length & sizeof(uint32_t)) {
+		crc = __crc32cw(crc, *buf4++);
+	}
+	
+	buf2 = (const  uint16_t *)(const void *)buf4;
+	if (length & sizeof(uint16_t)) {
+		crc = __crc32ch(crc, *buf2++);
+	}
+	
+	buf = (const  uint8_t *)(const void *)buf2;
+	if (length & sizeof(uint8_t))
+		crc = __crc32cb(crc, *buf);
+#endif/*if 1*/
+
+	return(~crc);
+}
+
+#endif/*if ARM_CRC32_INTRINSIC*/
+
+
 /* CRC32 software implementation. */
 
 /* Precalculated table used to generate the CRC32 if the CPU does not
 have support for it */
 static uint32_t	ut_crc32_slice8_table[8][256];
-static bool	ut_crc32_slice8_table_initialized = false;
+bool	ut_crc32_slice8_table_initialized = false;
 
 /********************************************************************//**
 Initializes the table that is used to generate the CRC32 if the CPU does
 not have support for it. */
-static
+//static
 void
 ut_crc32_slice8_table_init()
 /*========================*/
@@ -563,8 +794,8 @@ ut_crc32_64_legacy_big_endian_sw(
 @return CRC-32C (polynomial 0x11EDC6F41) */
 uint32_t
 ut_crc32_sw(
-	const byte*	buf,
-	ulint		len)
+	const uint8_t*	buf,
+	uint64_t	len)
 {
 	uint32_t	crc = 0xFFFFFFFFU;
 
@@ -727,6 +958,15 @@ ut_crc32_init()
 
 #endif /* defined(__GNUC__) && defined(__x86_64__) */
 
+#ifdef ARM_CRC32_INTRINSIC
+	ut_crc32_sse2_enabled = 0x1;
+	if (ut_crc32_sse2_enabled) {
+		ut_crc32 = ut_crc32_aarch64;
+		ut_crc32_legacy_big_endian = NULL;
+		ut_crc32_byte_by_byte = ut_crc32_byte_by_byte_aarch64;
+	}
+#endif
+
 	if (!ut_crc32_sse2_enabled) {
 		ut_crc32_slice8_table_init();
 		ut_crc32 = ut_crc32_sw;