mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512
With the "avx10_256" code removed and the AVX10 specification having been changed to basically just be a re-packaged AVX512, the "avx10_512" name no longer makes sense. Replace it with "avx512". While doing this, also add the "vaes_" prefix in places that didn't already have it. The result is that the two VAES optimized implementations are consistently called vaes_avx2 and vaes_avx512. (Also drop the "-x86_64" part of the assembly filename, to keep it from getting too long. There's no 32-bit version of this code, and the fact that it's 64-bit is unremarkable; it's the norm for new code.) Note: although aes_gcm_aad_update_vaes_avx512() (previously called aes_gcm_aad_update_vaes_avx10()) uses at most 256-bit vectors, it still depends on the AVX512 CPU feature. So its new name is still accurate. Also, a later commit will make it sometimes use 512-bit vectors anyway. Acked-by: Ard Biesheuvel <ardb@kernel.org> Tested-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20251002023117.37504-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
@@ -47,8 +47,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
|
||||
aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
|
||||
aes-gcm-aesni-x86_64.o \
|
||||
aes-gcm-vaes-avx2.o \
|
||||
aes-xts-avx-x86_64.o \
|
||||
aes-gcm-avx10-x86_64.o
|
||||
aes-gcm-vaes-avx512.o \
|
||||
aes-xts-avx-x86_64.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
|
||||
@@ -61,15 +61,15 @@
|
||||
// for the *_aesni functions or AVX for the *_aesni_avx ones. (But it seems
|
||||
// there are no CPUs that support AES-NI without also PCLMULQDQ and SSE4.1.)
|
||||
//
|
||||
// The design generally follows that of aes-gcm-avx10-x86_64.S, and that file is
|
||||
// The design generally follows that of aes-gcm-vaes-avx512.S, and that file is
|
||||
// more thoroughly commented. This file has the following notable changes:
|
||||
//
|
||||
// - The vector length is fixed at 128-bit, i.e. xmm registers. This means
|
||||
// there is only one AES block (and GHASH block) per register.
|
||||
//
|
||||
// - Without AVX512 / AVX10, only 16 SIMD registers are available instead of
|
||||
// 32. We work around this by being much more careful about using
|
||||
// registers, relying heavily on loads to load values as they are needed.
|
||||
// - Without AVX512, only 16 SIMD registers are available instead of 32. We
|
||||
// work around this by being much more careful about using registers,
|
||||
// relying heavily on loads to load values as they are needed.
|
||||
//
|
||||
// - Masking is not available either. We work around this by implementing
|
||||
// partial block loads and stores using overlapping scalar loads and stores
|
||||
@@ -90,8 +90,8 @@
|
||||
// multiplication instead of schoolbook multiplication. This saves one
|
||||
// pclmulqdq instruction per block, at the cost of one 64-bit load, one
|
||||
// pshufd, and 0.25 pxors per block. (This is without the three-argument
|
||||
// XOR support that would be provided by AVX512 / AVX10, which would be
|
||||
// more beneficial to schoolbook than Karatsuba.)
|
||||
// XOR support that would be provided by AVX512, which would be more
|
||||
// beneficial to schoolbook than Karatsuba.)
|
||||
//
|
||||
// As a rough approximation, we can assume that Karatsuba multiplication is
|
||||
// faster than schoolbook multiplication in this context if one pshufd and
|
||||
|
||||
@@ -49,12 +49,12 @@
|
||||
//
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// This is similar to aes-gcm-avx10-x86_64.S, but it uses AVX2 instead of
|
||||
// AVX512. This means it can only use 16 vector registers instead of 32, the
|
||||
// maximum vector length is 32 bytes, and some instructions such as vpternlogd
|
||||
// and masked loads/stores are unavailable. However, it is able to run on CPUs
|
||||
// that have VAES without AVX512, namely AMD Zen 3 (including "Milan" server
|
||||
// CPUs), various Intel client CPUs such as Alder Lake, and Intel Sierra Forest.
|
||||
// This is similar to aes-gcm-vaes-avx512.S, but it uses AVX2 instead of AVX512.
|
||||
// This means it can only use 16 vector registers instead of 32, the maximum
|
||||
// vector length is 32 bytes, and some instructions such as vpternlogd and
|
||||
// masked loads/stores are unavailable. However, it is able to run on CPUs that
|
||||
// have VAES without AVX512, namely AMD Zen 3 (including "Milan" server CPUs),
|
||||
// various Intel client CPUs such as Alder Lake, and Intel Sierra Forest.
|
||||
//
|
||||
// This implementation also uses Karatsuba multiplication instead of schoolbook
|
||||
// multiplication for GHASH in its main loop. This does not help much on Intel,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
|
||||
//
|
||||
// VAES and VPCLMULQDQ optimized AES-GCM for x86_64
|
||||
// AES-GCM implementation for x86_64 CPUs that support the following CPU
|
||||
// features: VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2
|
||||
//
|
||||
// Copyright 2024 Google LLC
|
||||
//
|
||||
@@ -45,41 +46,6 @@
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
//
|
||||
// This file implements AES-GCM (Galois/Counter Mode) for x86_64 CPUs that
|
||||
// support VAES (vector AES), VPCLMULQDQ (vector carryless multiplication), and
|
||||
// either AVX512 or AVX10. Some of the functions, notably the encryption and
|
||||
// decryption update functions which are the most performance-critical, are
|
||||
// provided in two variants generated from a macro: one using 256-bit vectors
|
||||
// (suffix: vaes_avx10_256) and one using 512-bit vectors (vaes_avx10_512). The
|
||||
// other, "shared" functions (vaes_avx10) use at most 256-bit vectors.
|
||||
//
|
||||
// The functions that use 512-bit vectors are intended for CPUs that support
|
||||
// 512-bit vectors *and* where using them doesn't cause significant
|
||||
// downclocking. They require the following CPU features:
|
||||
//
|
||||
// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/512)
|
||||
//
|
||||
// The other functions require the following CPU features:
|
||||
//
|
||||
// VAES && VPCLMULQDQ && BMI2 && ((AVX512BW && AVX512VL) || AVX10/256)
|
||||
//
|
||||
// All functions use the "System V" ABI. The Windows ABI is not supported.
|
||||
//
|
||||
// Note that we use "avx10" in the names of the functions as a shorthand to
|
||||
// really mean "AVX10 or a certain set of AVX512 features". Due to Intel's
|
||||
// introduction of AVX512 and then its replacement by AVX10, there doesn't seem
|
||||
// to be a simple way to name things that makes sense on all CPUs.
|
||||
//
|
||||
// Note that the macros that support both 256-bit and 512-bit vectors could
|
||||
// fairly easily be changed to support 128-bit too. However, this would *not*
|
||||
// be sufficient to allow the code to run on CPUs without AVX512 or AVX10,
|
||||
// because the code heavily uses several features of these extensions other than
|
||||
// the vector length: the increase in the number of SIMD registers from 16 to
|
||||
// 32, masking support, and new instructions such as vpternlogd (which can do a
|
||||
// three-argument XOR). These features are very useful for AES-GCM.
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
@@ -312,7 +278,7 @@
|
||||
vpternlogd $0x96, \t0, \mi, \hi
|
||||
.endm
|
||||
|
||||
// void aes_gcm_precompute_##suffix(struct aes_gcm_key_avx10 *key);
|
||||
// void aes_gcm_precompute_vaes_avx512(struct aes_gcm_key_vaes_avx512 *key);
|
||||
//
|
||||
// Given the expanded AES key |key->aes_key|, this function derives the GHASH
|
||||
// subkey and initializes |key->ghash_key_powers| with powers of it.
|
||||
@@ -588,9 +554,9 @@
|
||||
vmovdqu8 GHASHDATA3, 3*VL(DST)
|
||||
.endm
|
||||
|
||||
// void aes_gcm_{enc,dec}_update_##suffix(const struct aes_gcm_key_avx10 *key,
|
||||
// const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
// const u8 *src, u8 *dst, int datalen);
|
||||
// void aes_gcm_{enc,dec}_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
// const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
// const u8 *src, u8 *dst, int datalen);
|
||||
//
|
||||
// This macro generates a GCM encryption or decryption update function with the
|
||||
// above prototype (with \enc selecting which one). This macro supports both
|
||||
@@ -944,14 +910,14 @@
|
||||
RET
|
||||
.endm
|
||||
|
||||
// void aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
// const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
// u64 total_aadlen, u64 total_datalen);
|
||||
// bool aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
// const u32 le_ctr[4],
|
||||
// const u8 ghash_acc[16],
|
||||
// u64 total_aadlen, u64 total_datalen,
|
||||
// const u8 tag[16], int taglen);
|
||||
// void aes_gcm_enc_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
// const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
// u64 total_aadlen, u64 total_datalen);
|
||||
// bool aes_gcm_dec_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
// const u32 le_ctr[4],
|
||||
// const u8 ghash_acc[16],
|
||||
// u64 total_aadlen, u64 total_datalen,
|
||||
// const u8 tag[16], int taglen);
|
||||
//
|
||||
// This macro generates one of the above two functions (with \enc selecting
|
||||
// which one). Both functions finish computing the GCM authentication tag by
|
||||
@@ -1082,19 +1048,19 @@
|
||||
.endm
|
||||
|
||||
_set_veclen 64
|
||||
SYM_FUNC_START(aes_gcm_precompute_vaes_avx10_512)
|
||||
SYM_FUNC_START(aes_gcm_precompute_vaes_avx512)
|
||||
_aes_gcm_precompute
|
||||
SYM_FUNC_END(aes_gcm_precompute_vaes_avx10_512)
|
||||
SYM_FUNC_START(aes_gcm_enc_update_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_gcm_precompute_vaes_avx512)
|
||||
SYM_FUNC_START(aes_gcm_enc_update_vaes_avx512)
|
||||
_aes_gcm_update 1
|
||||
SYM_FUNC_END(aes_gcm_enc_update_vaes_avx10_512)
|
||||
SYM_FUNC_START(aes_gcm_dec_update_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_gcm_enc_update_vaes_avx512)
|
||||
SYM_FUNC_START(aes_gcm_dec_update_vaes_avx512)
|
||||
_aes_gcm_update 0
|
||||
SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_512)
|
||||
SYM_FUNC_END(aes_gcm_dec_update_vaes_avx512)
|
||||
|
||||
// void aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
// u8 ghash_acc[16],
|
||||
// const u8 *aad, int aadlen);
|
||||
// void aes_gcm_aad_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
// u8 ghash_acc[16],
|
||||
// const u8 *aad, int aadlen);
|
||||
//
|
||||
// This function processes the AAD (Additional Authenticated Data) in GCM.
|
||||
// Using the key |key|, it updates the GHASH accumulator |ghash_acc| with the
|
||||
@@ -1110,7 +1076,7 @@ SYM_FUNC_END(aes_gcm_dec_update_vaes_avx10_512)
|
||||
// VEX-coded instructions instead of EVEX-coded to save some instruction bytes.
|
||||
// To optimize for large amounts of AAD, we could implement a 4x-wide loop and
|
||||
// provide a version using 512-bit vectors, but that doesn't seem to be useful.
|
||||
SYM_FUNC_START(aes_gcm_aad_update_vaes_avx10)
|
||||
SYM_FUNC_START(aes_gcm_aad_update_vaes_avx512)
|
||||
|
||||
// Function arguments
|
||||
.set KEY, %rdi
|
||||
@@ -1178,11 +1144,11 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx10)
|
||||
|
||||
vzeroupper // This is needed after using ymm or zmm registers.
|
||||
RET
|
||||
SYM_FUNC_END(aes_gcm_aad_update_vaes_avx10)
|
||||
SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512)
|
||||
|
||||
SYM_FUNC_START(aes_gcm_enc_final_vaes_avx10)
|
||||
SYM_FUNC_START(aes_gcm_enc_final_vaes_avx512)
|
||||
_aes_gcm_final 1
|
||||
SYM_FUNC_END(aes_gcm_enc_final_vaes_avx10)
|
||||
SYM_FUNC_START(aes_gcm_dec_final_vaes_avx10)
|
||||
SYM_FUNC_END(aes_gcm_enc_final_vaes_avx512)
|
||||
SYM_FUNC_START(aes_gcm_dec_final_vaes_avx512)
|
||||
_aes_gcm_final 0
|
||||
SYM_FUNC_END(aes_gcm_dec_final_vaes_avx10)
|
||||
SYM_FUNC_END(aes_gcm_dec_final_vaes_avx512)
|
||||
@@ -904,8 +904,8 @@ struct aes_gcm_key_vaes_avx2 {
|
||||
#define AES_GCM_KEY_VAES_AVX2_SIZE \
|
||||
(sizeof(struct aes_gcm_key_vaes_avx2) + (31 & ~(CRYPTO_MINALIGN - 1)))
|
||||
|
||||
/* Key struct used by the VAES + AVX10 implementations of AES-GCM */
|
||||
struct aes_gcm_key_avx10 {
|
||||
/* Key struct used by the VAES + AVX512 implementation of AES-GCM */
|
||||
struct aes_gcm_key_vaes_avx512 {
|
||||
/*
|
||||
* Common part of the key. The assembly code prefers 16-byte alignment
|
||||
* for the round keys; we get this by them being located at the start of
|
||||
@@ -925,10 +925,10 @@ struct aes_gcm_key_avx10 {
|
||||
/* Three padding blocks required by the assembly code */
|
||||
u64 padding[3][2];
|
||||
};
|
||||
#define AES_GCM_KEY_AVX10(key) \
|
||||
container_of((key), struct aes_gcm_key_avx10, base)
|
||||
#define AES_GCM_KEY_AVX10_SIZE \
|
||||
(sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1)))
|
||||
#define AES_GCM_KEY_VAES_AVX512(key) \
|
||||
container_of((key), struct aes_gcm_key_vaes_avx512, base)
|
||||
#define AES_GCM_KEY_VAES_AVX512_SIZE \
|
||||
(sizeof(struct aes_gcm_key_vaes_avx512) + (63 & ~(CRYPTO_MINALIGN - 1)))
|
||||
|
||||
/*
|
||||
* These flags are passed to the AES-GCM helper functions to specify the
|
||||
@@ -941,12 +941,12 @@ struct aes_gcm_key_avx10 {
|
||||
#define FLAG_ENC BIT(1)
|
||||
#define FLAG_AVX BIT(2)
|
||||
#define FLAG_VAES_AVX2 BIT(3)
|
||||
#define FLAG_AVX10_512 BIT(4)
|
||||
#define FLAG_VAES_AVX512 BIT(4)
|
||||
|
||||
static inline struct aes_gcm_key *
|
||||
aes_gcm_key_get(struct crypto_aead *tfm, int flags)
|
||||
{
|
||||
if (flags & FLAG_AVX10_512)
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
return PTR_ALIGN(crypto_aead_ctx(tfm), 64);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
return PTR_ALIGN(crypto_aead_ctx(tfm), 32);
|
||||
@@ -961,12 +961,12 @@ aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key);
|
||||
asmlinkage void
|
||||
aes_gcm_precompute_vaes_avx2(struct aes_gcm_key_vaes_avx2 *key);
|
||||
asmlinkage void
|
||||
aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key);
|
||||
aes_gcm_precompute_vaes_avx512(struct aes_gcm_key_vaes_avx512 *key);
|
||||
|
||||
static void aes_gcm_precompute(struct aes_gcm_key *key, int flags)
|
||||
{
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key));
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_precompute_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key));
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_precompute_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key));
|
||||
else if (flags & FLAG_AVX)
|
||||
@@ -985,15 +985,15 @@ asmlinkage void
|
||||
aes_gcm_aad_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
asmlinkage void
|
||||
aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
aes_gcm_aad_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
u8 ghash_acc[16], const u8 *aad, int aadlen);
|
||||
|
||||
static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16],
|
||||
const u8 *aad, int aadlen, int flags)
|
||||
{
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc,
|
||||
aad, aadlen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_aad_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
ghash_acc, aad, aadlen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_aad_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
ghash_acc, aad, aadlen);
|
||||
@@ -1018,9 +1018,9 @@ aes_gcm_enc_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_enc_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
|
||||
asmlinkage void
|
||||
aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key,
|
||||
@@ -1035,9 +1035,9 @@ aes_gcm_dec_update_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
aes_gcm_dec_update_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
const u8 *src, u8 *dst, int datalen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline void
|
||||
@@ -1046,10 +1046,10 @@ aes_gcm_update(const struct aes_gcm_key *key,
|
||||
const u8 *src, u8 *dst, int datalen, int flags)
|
||||
{
|
||||
if (flags & FLAG_ENC) {
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_enc_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_enc_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1062,10 +1062,10 @@ aes_gcm_update(const struct aes_gcm_key *key,
|
||||
aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr,
|
||||
ghash_acc, src, dst, datalen);
|
||||
} else {
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_dec_update_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
src, dst, datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_dec_update_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1094,9 +1094,9 @@ aes_gcm_enc_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
asmlinkage void
|
||||
aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
aes_gcm_enc_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline void
|
||||
@@ -1104,10 +1104,10 @@ aes_gcm_enc_final(const struct aes_gcm_key *key,
|
||||
const u32 le_ctr[4], u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen, int flags)
|
||||
{
|
||||
if (flags & FLAG_AVX10_512)
|
||||
aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
aes_gcm_enc_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
aes_gcm_enc_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1138,10 +1138,10 @@ aes_gcm_dec_final_vaes_avx2(const struct aes_gcm_key_vaes_avx2 *key,
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
asmlinkage bool __must_check
|
||||
aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key,
|
||||
const u32 le_ctr[4], const u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
aes_gcm_dec_final_vaes_avx512(const struct aes_gcm_key_vaes_avx512 *key,
|
||||
const u32 le_ctr[4], const u8 ghash_acc[16],
|
||||
u64 total_aadlen, u64 total_datalen,
|
||||
const u8 tag[16], int taglen);
|
||||
|
||||
/* __always_inline to optimize out the branches based on @flags */
|
||||
static __always_inline bool __must_check
|
||||
@@ -1149,11 +1149,11 @@ aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4],
|
||||
u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen,
|
||||
u8 tag[16], int taglen, int flags)
|
||||
{
|
||||
if (flags & FLAG_AVX10_512)
|
||||
return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen,
|
||||
tag, taglen);
|
||||
if (flags & FLAG_VAES_AVX512)
|
||||
return aes_gcm_dec_final_vaes_avx512(AES_GCM_KEY_VAES_AVX512(key),
|
||||
le_ctr, ghash_acc,
|
||||
total_aadlen, total_datalen,
|
||||
tag, taglen);
|
||||
else if (flags & FLAG_VAES_AVX2)
|
||||
return aes_gcm_dec_final_vaes_avx2(AES_GCM_KEY_VAES_AVX2(key),
|
||||
le_ctr, ghash_acc,
|
||||
@@ -1245,10 +1245,10 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) != 640);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_enc) != 0);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_length) != 480);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) != 512);
|
||||
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, padding) != 768);
|
||||
|
||||
if (likely(crypto_simd_usable())) {
|
||||
err = aes_check_keylen(keylen);
|
||||
@@ -1281,8 +1281,9 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
|
||||
gf128mul_lle(&h, (const be128 *)x_to_the_minus1);
|
||||
|
||||
/* Compute the needed key powers */
|
||||
if (flags & FLAG_AVX10_512) {
|
||||
struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key);
|
||||
if (flags & FLAG_VAES_AVX512) {
|
||||
struct aes_gcm_key_vaes_avx512 *k =
|
||||
AES_GCM_KEY_VAES_AVX512(key);
|
||||
|
||||
for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) {
|
||||
k->h_powers[i][0] = be64_to_cpu(h.b);
|
||||
@@ -1579,10 +1580,10 @@ DEFINE_GCM_ALGS(vaes_avx2, FLAG_VAES_AVX2,
|
||||
"generic-gcm-vaes-avx2", "rfc4106-gcm-vaes-avx2",
|
||||
AES_GCM_KEY_VAES_AVX2_SIZE, 600);
|
||||
|
||||
/* aes_gcm_algs_vaes_avx10_512 */
|
||||
DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
|
||||
"generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
|
||||
AES_GCM_KEY_AVX10_SIZE, 800);
|
||||
/* aes_gcm_algs_vaes_avx512 */
|
||||
DEFINE_GCM_ALGS(vaes_avx512, FLAG_VAES_AVX512,
|
||||
"generic-gcm-vaes-avx512", "rfc4106-gcm-vaes-avx512",
|
||||
AES_GCM_KEY_VAES_AVX512_SIZE, 800);
|
||||
|
||||
static int __init register_avx_algs(void)
|
||||
{
|
||||
@@ -1631,16 +1632,16 @@ static int __init register_avx_algs(void)
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
|
||||
skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
|
||||
aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
|
||||
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx512); i++)
|
||||
aes_gcm_algs_vaes_avx512[i].base.cra_priority = 1;
|
||||
}
|
||||
|
||||
err = crypto_register_skciphers(skcipher_algs_vaes_avx512,
|
||||
ARRAY_SIZE(skcipher_algs_vaes_avx512));
|
||||
if (err)
|
||||
return err;
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx10_512,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512));
|
||||
err = crypto_register_aeads(aes_gcm_algs_vaes_avx512,
|
||||
ARRAY_SIZE(aes_gcm_algs_vaes_avx512));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -1661,7 +1662,7 @@ static void unregister_avx_algs(void)
|
||||
unregister_skciphers(skcipher_algs_vaes_avx2);
|
||||
unregister_skciphers(skcipher_algs_vaes_avx512);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx2);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx10_512);
|
||||
unregister_aeads(aes_gcm_algs_vaes_avx512);
|
||||
}
|
||||
#else /* CONFIG_X86_64 */
|
||||
static struct aead_alg aes_gcm_algs_aesni[0];
|
||||
|
||||
Reference in New Issue
Block a user