From 066dea7538d6f8ec3175f42fc3fcb0b5e3d7e45a Mon Sep 17 00:00:00 2001 From: Yuqi Gu Date: Fri, 18 Aug 2017 06:39:41 +0000 Subject: [PATCH] Accelerated AES with ARMv8 Crypto Extensions Jira: ENTWLS-399 Change-Id: Idc6ebc1f4279cd5746fb725c97a90f3c6c14c00a Signed-off-by: Yuqi Gu --- cmake/build_configurations/compiler_options.cmake | 16 ++- config.h.cmake | 3 + configure.cmake | 6 + extra/yassl/taocrypt/include/aes.hpp | 13 +- extra/yassl/taocrypt/src/aes.cpp | 156 +++++++++++++++++++++- 5 files changed, 188 insertions(+), 6 deletions(-) diff --git a/cmake/build_configurations/compiler_options.cmake b/cmake/build_configurations/compiler_options.cmake index f607508..052ea70 100644 --- a/cmake/build_configurations/compiler_options.cmake +++ b/cmake/build_configurations/compiler_options.cmake @@ -24,13 +24,17 @@ ENDIF() IF(SIZEOF_VOIDP EQUAL 8) SET(64BIT 1) ENDIF() - + # Compiler options -IF(UNIX) +IF(UNIX) # Default GCC flags IF(CMAKE_COMPILER_IS_GNUCC) - SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto") + ELSE() + SET(COMMON_C_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + ENDIF() # Disable inline optimizations for valgrind testing to avoid false positives IF(WITH_VALGRIND) SET(COMMON_C_FLAGS "-fno-inline ${COMMON_C_FLAGS}") @@ -54,7 +58,11 @@ IF(UNIX) SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 ${COMMON_C_FLAGS}") ENDIF() IF(CMAKE_COMPILER_IS_GNUCXX) - SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing -march=armv8-a+crypto") + ELSE() + SET(COMMON_CXX_FLAGS "-g -fabi-version=2 -fno-omit-frame-pointer -fno-strict-aliasing") + ENDIF() # GCC 6 has C++14 as default, set it explicitly to the old default. EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GXX_VERSION) diff --git a/config.h.cmake b/config.h.cmake index e0abacc..9704683 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -449,4 +449,7 @@ /* For default value of --early_plugin_load */ #cmakedefine DEFAULT_EARLY_PLUGIN_LOAD @DEFAULT_EARLY_PLUGIN_LOAD@ +/* For enable arm64v8 crypto extension */ +#cmakedefine ENABLE_ARM64V8_CRYPTO 1 + #endif diff --git a/configure.cmake b/configure.cmake index 1d98347..c54f3b5 100644 --- a/configure.cmake +++ b/configure.cmake @@ -997,3 +997,9 @@ CHECK_TYPE_SIZE("socklen_t" SIZEOF_SOCKLEN_T) IF(SIZEOF_SOCKLEN_T) SET(HAVE_SOCKLEN_T 1) ENDIF() + +# Enable ARM64v8 crypto extension +IF(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + MESSAGE(STATUS "ARMv8 CE enabled.") + SET(ENABLE_ARM64V8_CRYPTO 1) +ENDIF() diff --git a/extra/yassl/taocrypt/include/aes.hpp b/extra/yassl/taocrypt/include/aes.hpp index bccf6e7..e8a47c1 100644 --- a/extra/yassl/taocrypt/include/aes.hpp +++ b/extra/yassl/taocrypt/include/aes.hpp @@ -31,11 +31,18 @@ #define DO_AES_ASM #endif +#ifdef ENABLE_ARM64V8_CRYPTO +#define ARMV8_CE +#endif +#ifdef ARMV8_CE +#include +#define EXPND_R_KEY_MSIZE 60*4 +//#define ARMV8_CE_DEBUG +#endif namespace TaoCrypt { - enum { AES_BLOCK_SIZE = 16 }; @@ -50,6 +57,10 @@ public: #ifdef DO_AES_ASM void Process(byte*, const byte*, word32); #endif +#ifdef ARMV8_CE + void Process(byte*, const byte*, word32); + void armv8_aes_blkcrypt(int, uint8x16_t*, const uint8x16_t*); +#endif void SetKey(const byte* key, word32 sz, CipherDir fake = ENCRYPTION); void SetIV(const byte* iv) { memcpy(r_, iv, BLOCK_SIZE); } private: diff --git a/extra/yassl/taocrypt/src/aes.cpp b/extra/yassl/taocrypt/src/aes.cpp index 3fcf80a..ccafb5c 100644 --- a/extra/yassl/taocrypt/src/aes.cpp +++ b/extra/yassl/taocrypt/src/aes.cpp @@ -26,9 +26,159 @@ #include "runtime.hpp" #include "aes.hpp" - namespace TaoCrypt { +#if defined(ARMV8_CE) + +#ifdef ARMV8_CE_DEBUG +static void print_uint8 (uint8x16_t data, char* name) { + int i; + static uint8_t p[16]; + + vst1q_u8 (p, data); + + printf ("%s = ", name); + for (i = 0; i < 16; i++) { + printf ("%02x ", p[i]); + } + printf ("\n"); +} +#endif + +static void armv8_rk_init(word32* rk_from, word32 keylen) { + int i; + word32* wd_op = rk_from; + word32 wd_to; + + for (i = 0; i < keylen / 4; i ++ ) { + wd_to = (*(wd_op + i) & 0xff000000) >> 24 + |(*(wd_op + i) & 0xff0000) >> 8 + |(*(wd_op + i) & 0xff00) << 8 + |(*(wd_op + i) & 0xff) << 24; + + *(wd_op + i) = wd_to; + } +} + +void AES::armv8_aes_blkcrypt(int crypt_mode, uint8x16_t *o_buff, const uint8x16_t *i_buff) { + + int i; + uint8x16_t input_vec, rk_vec; + + byte *rk_expnd = (byte*)key_; + input_vec = *i_buff; + + if (ENCRYPTION == crypt_mode) { + for (i = 0; i < rounds_ - 1; i ++ ) { + /* Load expanded Round Key */ + rk_vec = vld1q_u8(rk_expnd); + + /* AddRoundKey, SubBytes and ShiftRows */ + input_vec = vaeseq_u8(input_vec, rk_vec); + + /* Mix Columns */ + input_vec = vaesmcq_u8(input_vec); + +#ifdef ARMV8_CE_DEBUG + printf("E-Round %d ",i+1); + print_uint8(input_vec, "input_vec"); +#endif + + /* Load next expanded round key */ + rk_expnd += 16; + } + + /* Final round, No Mix columns */ + rk_vec = vld1q_u8(rk_expnd); + input_vec = vaeseq_u8(input_vec, rk_vec); + } + else { + /*DECRYPTION*/ + for (i = 0; i < rounds_ - 1; i++ ) { + /* Load expanded Round Key */ + rk_vec = vld1q_u8(rk_expnd); + + /* Reverse: AddRoundKey, SubBytes and ShiftRows */ + input_vec = vaesdq_u8(input_vec, rk_vec); + + /* Inverse Mix Columns */ + input_vec = vaesimcq_u8(input_vec); + +#ifdef ARMV8_CE_DEBUG + printf("D-Round %d ",i+1); + print_uint8(input_vec, "input_vec"); +#endif + /* Load next expanded round key */ + rk_expnd += 16; + } + + /* Final round, No Mix columns */ + rk_vec = vld1q_u8(rk_expnd); + input_vec = vaesdq_u8(input_vec, rk_vec); + } + + /* Final Add-Round-key step */ + rk_expnd += 16; + rk_vec = vld1q_u8(rk_expnd); + input_vec = veorq_u8(input_vec, rk_vec); + +#ifdef ARMV8_CE_DEBUG + print_uint8(input_vec, "Final vec"); +#endif + + *o_buff = input_vec; +} + +void AES::Process(byte* o_buff, const byte* i_buff, word32 crypt_size) { + + word32 crypt_blocks = crypt_size / BLOCK_SIZE; + uint8x16_t in_vec, out_vec,iv_vec, tmp_vec; + + if (mode_ == ECB) { + while(crypt_blocks --) { + in_vec = vld1q_u8(i_buff); + armv8_aes_blkcrypt((int)dir_, &out_vec, &in_vec); + + // Write results vec back to output buffer + vst1q_u8(o_buff, out_vec); + + o_buff += BLOCK_SIZE; + i_buff += BLOCK_SIZE; + } + } else if (mode_ == CBC) { + iv_vec = vld1q_u8((byte*)r_); + + while (crypt_blocks --) { + in_vec = vld1q_u8(i_buff); + + if (dir_ == ENCRYPTION) { + tmp_vec = veorq_u8(iv_vec, in_vec); + armv8_aes_blkcrypt((int)dir_, &out_vec, &tmp_vec); + iv_vec = out_vec; + + // Write results back to output buffer + vst1q_u8(o_buff, out_vec); + + o_buff += BLOCK_SIZE; + i_buff += BLOCK_SIZE; + + } else { + armv8_aes_blkcrypt((int)dir_, &tmp_vec, &in_vec); + out_vec = veorq_u8(tmp_vec, iv_vec); + + // Write results back to output buffer + vst1q_u8(o_buff, out_vec); + + iv_vec = in_vec; + o_buff += BLOCK_SIZE; + i_buff += BLOCK_SIZE; + } + } + } +} + +#endif/* End if ARMV8_CE */ + #if defined(DO_AES_ASM) @@ -212,6 +362,10 @@ void AES::SetKey(const byte* userKey, word32 keylen, CipherDir /*dummy*/) Td3[Te1[GETBYTE(rk[3], 0)] & 0xff]; } } + +#if defined(ARMV8_CE) + armv8_rk_init(key_, EXPND_R_KEY_MSIZE); +#endif } -- 2.7.4