mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 11:56:58 +00:00
Merge tag 'aes-gcm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull AES-GCM optimizations from Eric Biggers:
"More optimizations and cleanups for the x86_64 AES-GCM code:
- Add a VAES+AVX2 optimized implementation of AES-GCM. This is very
helpful on CPUs that have VAES but not AVX512, such as AMD Zen 3.
- Make the VAES+AVX512 optimized implementation of AES-GCM handle
large amounts of associated data efficiently.
- Remove the "avx10_256" implementation of AES-GCM. It's superseded
by the VAES+AVX2 optimized implementation.
- Rename the "avx10_512" implementation to "avx512"
Overall, this fills in a gap where AES-GCM wasn't fully optimized on
some recent CPUs. It also drops code that won't be as useful as
initially expected due to AVX10/256 being dropped from the AVX10 spec"
* tag 'aes-gcm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
crypto: x86/aes-gcm-vaes-avx2 - initialize full %rax return register
crypto: x86/aes-gcm - optimize long AAD processing with AVX512
crypto: x86/aes-gcm - optimize AVX512 precomputation of H^2 from H^1
crypto: x86/aes-gcm - revise some comments in AVX512 code
crypto: x86/aes-gcm - reorder AVX512 precompute and aad_update functions
crypto: x86/aes-gcm - clean up AVX512 code to assume 512-bit vectors
crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512
crypto: x86/aes-gcm - remove VAES+AVX10/256 optimized code
crypto: x86/aes-gcm - add VAES+AVX2 optimized code
This commit is contained in:
@@ -46,8 +46,9 @@ obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
|
||||
aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
|
||||
aes-gcm-aesni-x86_64.o \
|
||||
aes-xts-avx-x86_64.o \
|
||||
aes-gcm-avx10-x86_64.o
|
||||
aes-gcm-vaes-avx2.o \
|
||||
aes-gcm-vaes-avx512.o \
|
||||
aes-xts-avx-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
|
||||
@@ -61,15 +61,15 @@
|
||||
// for the *_aesni functions or AVX for the *_aesni_avx ones. (But it seems
|
||||
// there are no CPUs that support AES-NI without also PCLMULQDQ and SSE4.1.)
|
||||
//
|
||||
// The design generally follows that of aes-gcm-avx10-x86_64.S, and that file is
|
||||
// The design generally follows that of aes-gcm-vaes-avx512.S, and that file is
|
||||
// more thoroughly commented. This file has the following notable changes:
|
||||
//
|
||||
// - The vector length is fixed at 128-bit, i.e. xmm registers. This means
|
||||
// there is only one AES block (and GHASH block) per register.
|
||||
//
|
||||
// - Without AVX512 / AVX10, only 16 SIMD registers are available instead of
|
||||
// 32. We work around this by being much more careful about using
|
||||
// registers, relying heavily on loads to load values as they are needed.
|
||||
// - Without AVX512, only 16 SIMD registers are available instead of 32. We
|
||||
// work around this by being much more careful about using registers,
|
||||
// relying heavily on loads to load values as they are needed.
|
||||
//
|
||||
// - Masking is not available either. We work around this by implementing
|
||||
// partial block loads and stores using overlapping scalar loads and stores
|
||||
@@ -90,8 +90,8 @@
|
||||
// multiplication instead of schoolbook multiplication. This saves one
|
||||
// pclmulqdq instruction per block, at the cost of one 64-bit load, one
|
||||
// pshufd, and 0.25 pxors per block. (This is without the three-argument
|
||||
// XOR support that would be provided by AVX512 / AVX10, which would be
|
||||
// more beneficial to schoolbook than Karatsuba.)
|
||||
// XOR support that would be provided by AVX512, which would be more
|
||||
// beneficial to schoolbook than Karatsuba.)
|
||||
//
|
||||
// As a rough approximation, we can assume that Karatsuba multiplication is
|
||||
// faster than schoolbook multiplication in this context if one pshufd and
|
||||
|
||||
1146
arch/x86/crypto/aes-gcm-vaes-avx2.S
Normal file
1146
arch/x86/crypto/aes-gcm-vaes-avx2.S
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -874,8 +874,38 @@ struct aes_gcm_key_aesni {
|
||||
#define AES_GCM_KEY_AESNI_SIZE \
|
||||
(sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1)))
|
||||
|
||||
/* Key struct used by the VAES + AVX10 implementations of AES-GCM */
|
||||
struct aes_gcm_key_avx10 {
|
||||
/* Key struct used by the VAES + AVX2 implementation of AES-GCM */
|
||||
struct aes_gcm_key_vaes_avx2 {
|
||||
/*
|
||||
* Common part of the key. The assembly code prefers 16-byte alignment
|
||||
* for the round keys; we get this by them being located at the start of
|
||||
* the struct and the whole struct being 32-byte aligned.
|
||||
*/
|
||||
struct aes_gcm_key base;
|
||||
|
||||
/*
|
||||
* Powers of the hash key H^8 through H^1. These are 128-bit values.
|
||||
* They all have an extra factor of x^-1 and are byte-reversed.
|
||||
* The assembly code prefers 32-byte alignment for this.
|
||||
*/
|
||||
u64 h_powers[8][2] __aligned(32);
|
||||
|
||||
/*
|
||||
* Each entry in this array contains the two halves of an entry of
|
||||
* h_powers XOR'd together, in the following order:
|
||||
* H^8,H^6,H^7,H^5,H^4,H^2,H^3,H^1 i.e. indices 0,2,1,3,4,6,5,7.
|
||||
* This is used for Karatsuba multiplication.
|
||||
*/
|
||||
u64 h_powers_xored[8];
|
||||
};
|
||||
|
||||
#define AES_GCM_KEY_VAES_AVX2(key) \
|
||||
container_of((key), struct aes_gcm_key_vaes_avx2, base)
|
||||
#define AES_GCM_KEY_VAES_AVX2_SIZE \
|
||||
(sizeof(struct aes_gcm_key_vaes_avx2) + (31 & ~(CRYPTO_MINALIGN - 1)))
|
||||
|
||||
/* Key struct used by the VAES + AVX512 implementation of AES-GCM */
|
||||
struct aes_gcm_key_vaes_avx512 {
|
||||
/*
|
||||
* Common part of the key. The assembly code prefers 16-byte alignment
|
||||
* for the round keys; we get this by them being located at the start of
|
||||
@@ -895,10 +925,10 @@ struct aes_gcm_key_avx10 {
|
||||
/* Three padding blocks required by the assembly code */
|
||||
u64 padding[3][2];
|
||||
};
|
||||
#define AES_GCM_KEY_AVX10(key) \
|
||||
container_of((key), struct aes_gcm_key_avx10, base)
|
||||
#define AES_GCM_KEY_AVX10_SIZE \
|
||||
(sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
|
||||
#define AES_GCM_KEY_VAES_AVX512(key) \
|
||||
container_of((key), struct aes_gcm_key_vaes_avx512, base)
|
||||
#define AES_GCM_KEY_VAES_AVX512_SIZE \
|
||||
(sizeof(struct aes_gcm_key_vaes_avx512) + (63 & ~(CRYPTO_MINALIGN - 1)))
|
||||
|
||||
/*
|
||||
* These flags are passed to the AES-GCM helper functions to specify the
|
||||
@@ -910,14 +940,16 @@ struct aes_gcm_key_avx10 {
|
||||
#define FLAG_RFC4106 BIT(0)
|
||||
#define FLAG_ENC BIT(1)
|
||||
#define FLAG_AVX BIT(2)
|
||||
#define FLAG_AVX10_256 BIT(3)
|
||||
#define FLAG_AVX10_512 BIT(4)
|
||||
#define FLAG_VAES_AVX2 BIT(3)
|
||||
#define FLAG_VAES_AVX512 BIT(4)
|
||||
|
||||
static inline struct aes_gcm_key *
|
||||
aes_gcm_key_get(struct crypto_aead *tfm, int flags)
|
||||
{
|
||||
if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
return PTR_ALIGN(crypto_aead_ctx(tfm), 64);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
return PTR_ALIGN(crypto_aead_ctx(tfm), 32);
|
||||
else
|
||||
return PTR_ALIGN(crypto_aead_ctx(tfm), 16);
|
||||
}
|
||||
@@ -927,26 +959,16 @@ aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key);
|
||||
asmlinkage void
|
||||
aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key);
|
||||
asmlinkage void
|
||||
aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key);
|
||||
aes_gcm_precompute_vaes_avx2(struct aes_gcm_key_vaes_avx2 *key);
|
||||
asmlinkage void
|
||||
aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key);
|
||||
aes_gcm_precompute_vaes_avx512(struct aes_gcm_key_vaes_avx512 *key);
|
||||
|
||||
static void aes_gcm_precompute(struct aes_gcm_key *key, int flags)
|
||||
{
|
||||
/*
|
||||
* To make things a bit easier on the assembly side, the AVX10
|
||||
* implementations use the same key format. Therefore, a single
|
||||
* function using 256-bit vectors would suffice here. However, it's
|
||||
* straightforward to provide a 512-bit one because of how the assembly
|
||||
* code is structured, and it works nicely because the total size of the
|
||||
* key powers is a multiple of 512 bits. So we take advantage of that.
|
||||
*
|
||||
* A similar situation applies to the AES-NI implementations.
|
||||
*/
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key));
|
||||
else if (flags & FLAG_AVX10_256)
|
||||
aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key));
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_precompute_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key));
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_precompute_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key));
|
||||
else if (flags & FLAG_AVX)
|
||||
aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key));
|
||||
else
|
||||
@@ -960,15 +982,21 @@ asmlinkage void
|
||||
aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
asmlinkage void
|
||||
aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
aes_gcm_aad_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
asmlinkage void
|
||||
aes_gcm_aad_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
|
||||
static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16],
|
||||
const u8 *aad, int aadlen, int flags)
|
||||
{
|
||||
if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
|
||||
aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc,
|
||||
aad, aadlen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_aad_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
ghash_acc, aad, aadlen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_aad_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
ghash_acc, aad, aadlen);
|
||||
else if (flags & FLAG_AVX)
|
||||
aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc,
|
||||
aad, aadlen);
|
||||
@@ -986,13 +1014,13 @@ aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_enc_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_enc_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
|
||||
asmlinkage void
|
||||
aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key,
|
||||
@@ -1003,13 +1031,13 @@ aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_dec_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_dec_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline void
|
||||
@@ -1018,14 +1046,14 @@ aes_gcm_update(const struct aes_gcm_key *key,
|
||||
const u8 *src, u8 *dst, int datalen, int flags)
|
||||
{
|
||||
if (flags & FLAG_ENC) {
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_AVX10_256)
|
||||
aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_enc_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_enc_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_AVX)
|
||||
aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1034,14 +1062,14 @@ aes_gcm_update(const struct aes_gcm_key *key,
|
||||
aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr,
|
||||
ghash_acc, src, dst, datalen);
|
||||
} else {
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_AVX10_256)
|
||||
aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_dec_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_dec_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_AVX)
|
||||
aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1062,9 +1090,13 @@ aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
aes_gcm_enc_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline void
|
||||
@@ -1072,10 +1104,14 @@ aes_gcm_enc_final(const struct aes_gcm_key *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen, int flags)
|
||||
{
|
||||
if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
|
||||
aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_enc_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_enc_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen);
|
||||
else if (flags & FLAG_AVX)
|
||||
aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1097,10 +1133,15 @@ aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key,
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
asmlinkage bool __must_check
|
||||
aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], const u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
aes_gcm_dec_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], const u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
asmlinkage bool __must_check
|
||||
aes_gcm_dec_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], const u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline bool __must_check
|
||||
@@ -1108,11 +1149,16 @@ aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4],
|
||||
u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen,
|
||||
u8 tag[16], int taglen, int flags)
|
||||
{
|
||||
if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512))
|
||||
return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen,
|
||||
tag, taglen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
return aes_gcm_dec_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen,
|
||||
tag, taglen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
return aes_gcm_dec_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen,
|
||||
tag, taglen);
|
||||
else if (flags & FLAG_AVX)
|
||||
return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1195,10 +1241,14 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_enc) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) != 640);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_enc) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, padding) != 768);
|
||||
|
||||
if (likely(crypto_simd_usable())) {
|
||||
err = aes_check_keylen(keylen);
|
||||
@@ -1231,8 +1281,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
|
||||
gf128mul_lle(&h, (const be128 *)x_to_the_minus1);
|
||||
|
||||
/* Compute the needed key powers */
|
||||
if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) {
|
||||
struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key);
|
||||
if (flags & FLAG_VAES_AVX512) {
|
||||
struct aes_gcm_key_vaes_avx512 *k =
|
||||
AES_GCM_KEY_VAES_AVX512(key);
|
||||
|
||||
for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
|
||||
k->h_powers[i][0] = be64_to_cpu(h.b);
|
||||
@@ -1240,6 +1291,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
|
||||
gf128mul_lle(&h, &h1);
|
||||
}
|
||||
memset(k->padding, 0, sizeof(k->padding));
|
||||
} else if (flags & FLAG_VAES_AVX2) {
|
||||
struct aes_gcm_key_vaes_avx2 *k =
|
||||
AES_GCM_KEY_VAES_AVX2(key);
|
||||
static const u8 indices[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
|
||||
|
||||
for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
|
||||
k->h_powers[i][0] = be64_to_cpu(h.b);
|
||||
k->h_powers[i][1] = be64_to_cpu(h.a);
|
||||
gf128mul_lle(&h, &h1);
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(k->h_powers_xored); i++) {
|
||||
int j = indices[i];
|
||||
|
||||
k->h_powers_xored[i] = k->h_powers[j][0] ^
|
||||
k->h_powers[j][1];
|
||||
}
|
||||
} else {
|
||||
struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key);
|
||||
|
||||
@@ -1508,15 +1575,15 @@ DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX,
|
||||
"generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx",
|
||||
AES_GCM_KEY_AESNI_SIZE, 500);
|
||||
|
||||
/* aes_gcm_algs_vaes_avx10_256 */
|
||||
DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
|
||||
"generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256",
|
||||
AES_GCM_KEY_AVX10_SIZE, 700);
|
||||
/* aes_gcm_algs_vaes_avx2 */
|
||||
DEFINE_GCM_ALGS(vaes_avx2, FLAG_VAES_AVX2,
|
||||
"generic-gcm-vaes-avx2", "rfc4106-gcm-vaes-avx2",
|
||||
AES_GCM_KEY_VAES_AVX2_SIZE, 600);
|
||||
|
||||
/* aes_gcm_algs_vaes_avx10_512 */
|
||||
DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
|
||||
"generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
|
||||
AES_GCM_KEY_AVX10_SIZE, 800);
|
||||
/* aes_gcm_algs_vaes_avx512 */
|
||||
DEFINE_GCM_ALGS(vaes_avx512, FLAG_VAES_AVX512,
|
||||
"generic-gcm-vaes-avx512", "rfc4106-gcm-vaes-avx512",
|
||||
AES_GCM_KEY_VAES_AVX512_SIZE, 800);
|
||||
|
||||
static int __init register_avx_algs(void)
|
||||
{
|
||||
@@ -1548,6 +1615,10 @@ static int __init register_avx_algs(void)
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx2));
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx2,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx2));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_AVX512BW) ||
|
||||
!boot_cpu_has(X86_FEATURE_AVX512VL) ||
|
||||
@@ -1556,26 +1627,21 @@ static int __init register_avx_algs(void)
|
||||
XFEATURE_MASK_AVX512, NULL))
|
||||
return 0;
|
||||
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_256,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
|
||||
skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
|
||||
aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx512); i++)
|
||||
aes_gcm_algs_vaes_avx512[i].base.cra_priority = 1;
|
||||
}
|
||||
|
||||
err = crypto_register_skciphers(skcipher_algs_vaes_avx512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx512));
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_512,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512));
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx512,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx512));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -1595,8 +1661,8 @@ static void unregister_avx_algs(void)
|
||||
unregister_aeads(aes_gcm_algs_aesni_avx);
|
||||
unregister_skciphers(skcipher_algs_vaes_avx2);
|
||||
unregister_skciphers(skcipher_algs_vaes_avx512);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx10_256);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx10_512);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx2);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx512);
|
||||
}
|
||||
#else /* CONFIG_X86_64 */
|
||||
static struct aead_alg aes_gcm_algs_aesni[0];
|
||||
|
||||
Reference in New Issue
Block a user