diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 376d6b50743f..bdd276a6e540 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -47,16 +47,6 @@ config CRYPTO_SM3_ARM64_CE Architecture: arm64 using: - ARMv8.2 Crypto Extensions -config CRYPTO_POLYVAL_ARM64_CE - tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)" - depends on KERNEL_MODE_NEON - select CRYPTO_POLYVAL - help - POLYVAL hash function for HCTR2 - - Architecture: arm64 using: - - ARMv8 Crypto Extensions - config CRYPTO_AES_ARM64 tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS" select CRYPTO_AES diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index fd3d590fa113..1e330aa08d3f 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -29,9 +29,6 @@ sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o -obj-$(CONFIG_CRYPTO_POLYVAL_ARM64_CE) += polyval-ce.o -polyval-ce-y := polyval-ce-glue.o polyval-ce-core.o - obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o diff --git a/arch/arm64/crypto/polyval-ce-glue.c b/arch/arm64/crypto/polyval-ce-glue.c deleted file mode 100644 index c4e653688ea0..000000000000 --- a/arch/arm64/crypto/polyval-ce-glue.c +++ /dev/null @@ -1,158 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Glue code for POLYVAL using ARMv8 Crypto Extensions - * - * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying - * Copyright 2021 Google LLC - */ - -/* - * Glue code based on ghash-clmulni-intel_glue.c. - * - * This implementation of POLYVAL uses montgomery multiplication accelerated by - * ARMv8 Crypto Extensions instructions to implement the finite field operations. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUM_KEY_POWERS 8 - -struct polyval_tfm_ctx { - /* - * These powers must be in the order h^8, ..., h^1. - */ - u8 key_powers[NUM_KEY_POWERS][POLYVAL_BLOCK_SIZE]; -}; - -struct polyval_desc_ctx { - u8 buffer[POLYVAL_BLOCK_SIZE]; -}; - -asmlinkage void pmull_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator); -asmlinkage void pmull_polyval_mul(u8 *op1, const u8 *op2); - -static void internal_polyval_update(const struct polyval_tfm_ctx *keys, - const u8 *in, size_t nblocks, u8 *accumulator) -{ - kernel_neon_begin(); - pmull_polyval_update(keys, in, nblocks, accumulator); - kernel_neon_end(); -} - -static void internal_polyval_mul(u8 *op1, const u8 *op2) -{ - kernel_neon_begin(); - pmull_polyval_mul(op1, op2); - kernel_neon_end(); -} - -static int polyval_arm64_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct polyval_tfm_ctx *tctx = crypto_shash_ctx(tfm); - int i; - - if (keylen != POLYVAL_BLOCK_SIZE) - return -EINVAL; - - memcpy(tctx->key_powers[NUM_KEY_POWERS-1], key, POLYVAL_BLOCK_SIZE); - - for (i = NUM_KEY_POWERS-2; i >= 0; i--) { - memcpy(tctx->key_powers[i], key, POLYVAL_BLOCK_SIZE); - internal_polyval_mul(tctx->key_powers[i], - tctx->key_powers[i+1]); - } - - return 0; -} - -static int polyval_arm64_init(struct shash_desc *desc) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - - memset(dctx, 0, sizeof(*dctx)); - - return 0; -} - -static int polyval_arm64_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - unsigned int nblocks; - - do { - /* allow rescheduling every 4K bytes */ - nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE; - internal_polyval_update(tctx, src, nblocks, dctx->buffer); - srclen -= nblocks * POLYVAL_BLOCK_SIZE; - src += nblocks * POLYVAL_BLOCK_SIZE; - } while (srclen >= POLYVAL_BLOCK_SIZE); - - return srclen; -} - -static int polyval_arm64_finup(struct shash_desc *desc, const u8 *src, - unsigned int len, u8 *dst) -{ - struct polyval_desc_ctx *dctx = shash_desc_ctx(desc); - const struct polyval_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - - if (len) { - crypto_xor(dctx->buffer, src, len); - internal_polyval_mul(dctx->buffer, - tctx->key_powers[NUM_KEY_POWERS-1]); - } - - memcpy(dst, dctx->buffer, POLYVAL_BLOCK_SIZE); - - return 0; -} - -static struct shash_alg polyval_alg = { - .digestsize = POLYVAL_DIGEST_SIZE, - .init = polyval_arm64_init, - .update = polyval_arm64_update, - .finup = polyval_arm64_finup, - .setkey = polyval_arm64_setkey, - .descsize = sizeof(struct polyval_desc_ctx), - .base = { - .cra_name = "polyval", - .cra_driver_name = "polyval-ce", - .cra_priority = 200, - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, - .cra_blocksize = POLYVAL_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct polyval_tfm_ctx), - .cra_module = THIS_MODULE, - }, -}; - -static int __init polyval_ce_mod_init(void) -{ - return crypto_register_shash(&polyval_alg); -} - -static void __exit polyval_ce_mod_exit(void) -{ - crypto_unregister_shash(&polyval_alg); -} - -module_cpu_feature_match(PMULL, polyval_ce_mod_init) -module_exit(polyval_ce_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("POLYVAL hash function accelerated by ARMv8 Crypto Extensions"); -MODULE_ALIAS_CRYPTO("polyval"); -MODULE_ALIAS_CRYPTO("polyval-ce"); diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h index 5ba4c248cad1..f8aaf4275fbd 100644 --- a/include/crypto/polyval.h +++ b/include/crypto/polyval.h @@ -39,10 +39,18 @@ struct polyval_elem { * This may contain just the raw key H, or it may contain precomputed key * powers, depending on the platform's POLYVAL implementation. Use * polyval_preparekey() to initialize this. + * + * By H^i we mean H^(i-1) * H * x^-128, with base case H^1 = H. I.e. the + * exponentiation repeats the POLYVAL dot operation, with its "extra" x^-128. */ struct polyval_key { #ifdef CONFIG_CRYPTO_LIB_POLYVAL_ARCH +#ifdef CONFIG_ARM64 + /** @h_powers: Powers of the hash key H^8 through H^1 */ + struct polyval_elem h_powers[8]; +#else #error "Unhandled arch" +#endif #else /* CONFIG_CRYPTO_LIB_POLYVAL_ARCH */ /** @h: The hash key H */ struct polyval_elem h; diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 6545f0e83b83..430723994142 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -144,6 +144,7 @@ config CRYPTO_LIB_POLYVAL config CRYPTO_LIB_POLYVAL_ARCH bool depends on CRYPTO_LIB_POLYVAL && !UML + default y if ARM64 && KERNEL_MODE_NEON config CRYPTO_LIB_CHACHA20POLY1305 tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 055e44008805..2efa96afcb4b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -202,6 +202,7 @@ obj-$(CONFIG_CRYPTO_LIB_POLYVAL) += libpolyval.o libpolyval-y := polyval.o ifeq ($(CONFIG_CRYPTO_LIB_POLYVAL_ARCH),y) CFLAGS_polyval.o += -I$(src)/$(SRCARCH) +libpolyval-$(CONFIG_ARM64) += arm64/polyval-ce-core.o endif ################################################################################ diff --git a/arch/arm64/crypto/polyval-ce-core.S b/lib/crypto/arm64/polyval-ce-core.S similarity index 92% rename from arch/arm64/crypto/polyval-ce-core.S rename to lib/crypto/arm64/polyval-ce-core.S index b5326540d2e3..7c731a044d02 100644 --- a/arch/arm64/crypto/polyval-ce-core.S +++ b/lib/crypto/arm64/polyval-ce-core.S @@ -27,10 +27,10 @@ #include #define STRIDE_BLOCKS 8 -KEY_POWERS .req x0 -MSG .req x1 -BLOCKS_LEFT .req x2 -ACCUMULATOR .req x3 +ACCUMULATOR .req x0 +KEY_POWERS .req x1 +MSG .req x2 +BLOCKS_LEFT .req x3 KEY_START .req x10 EXTRA_BYTES .req x11 TMP .req x13 @@ -300,15 +300,12 @@ GSTAR .req v24 .endm /* - * Perform montgomery multiplication in GF(2^128) and store result in op1. + * Computes a = a * b * x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1. * - * Computes op1*op2*x^{-128} mod x^128 + x^127 + x^126 + x^121 + 1 - * If op1, op2 are in montgomery form, this computes the montgomery - * form of op1*op2. - * - * void pmull_polyval_mul(u8 *op1, const u8 *op2); + * void polyval_mul_pmull(struct polyval_elem *a, + * const struct polyval_elem *b); */ -SYM_FUNC_START(pmull_polyval_mul) +SYM_FUNC_START(polyval_mul_pmull) adr TMP, .Lgstar ld1 {GSTAR.2d}, [TMP] ld1 {v0.16b}, [x0] @@ -318,22 +315,23 @@ SYM_FUNC_START(pmull_polyval_mul) montgomery_reduction SUM st1 {SUM.16b}, [x0] ret -SYM_FUNC_END(pmull_polyval_mul) +SYM_FUNC_END(polyval_mul_pmull) /* * Perform polynomial evaluation as specified by POLYVAL. This computes: * h^n * accumulator + h^n * m_0 + ... + h^1 * m_{n-1} * where n=nblocks, h is the hash key, and m_i are the message blocks. * - * x0 - pointer to precomputed key powers h^8 ... h^1 - * x1 - pointer to message blocks - * x2 - number of blocks to hash - * x3 - pointer to accumulator + * x0 - pointer to accumulator + * x1 - pointer to precomputed key powers h^8 ... h^1 + * x2 - pointer to message blocks + * x3 - number of blocks to hash * - * void pmull_polyval_update(const struct polyval_ctx *ctx, const u8 *in, - * size_t nblocks, u8 *accumulator); + * void polyval_blocks_pmull(struct polyval_elem *acc, + * const struct polyval_key *key, + * const u8 *data, size_t nblocks); */ -SYM_FUNC_START(pmull_polyval_update) +SYM_FUNC_START(polyval_blocks_pmull) adr TMP, .Lgstar mov KEY_START, KEY_POWERS ld1 {GSTAR.2d}, [TMP] @@ -358,4 +356,4 @@ SYM_FUNC_START(pmull_polyval_update) .LskipPartial: st1 {SUM.16b}, [ACCUMULATOR] ret -SYM_FUNC_END(pmull_polyval_update) +SYM_FUNC_END(polyval_blocks_pmull) diff --git a/lib/crypto/arm64/polyval.h b/lib/crypto/arm64/polyval.h new file mode 100644 index 000000000000..2486e80750d0 --- /dev/null +++ b/lib/crypto/arm64/polyval.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * POLYVAL library functions, arm64 optimized + * + * Copyright 2025 Google LLC + */ +#include +#include +#include + +#define NUM_H_POWERS 8 + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull); + +asmlinkage void polyval_mul_pmull(struct polyval_elem *a, + const struct polyval_elem *b); +asmlinkage void polyval_blocks_pmull(struct polyval_elem *acc, + const struct polyval_key *key, + const u8 *data, size_t nblocks); + +static void polyval_preparekey_arch(struct polyval_key *key, + const u8 raw_key[POLYVAL_BLOCK_SIZE]) +{ + static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS); + memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE); + if (static_branch_likely(&have_pmull) && may_use_simd()) { + kernel_neon_begin(); + for (int i = NUM_H_POWERS - 2; i >= 0; i--) { + key->h_powers[i] = key->h_powers[i + 1]; + polyval_mul_pmull(&key->h_powers[i], + &key->h_powers[NUM_H_POWERS - 1]); + } + kernel_neon_end(); + } else { + for (int i = NUM_H_POWERS - 2; i >= 0; i--) { + key->h_powers[i] = key->h_powers[i + 1]; + polyval_mul_generic(&key->h_powers[i], + &key->h_powers[NUM_H_POWERS - 1]); + } + } +} + +static void polyval_mul_arch(struct polyval_elem *acc, + const struct polyval_key *key) +{ + if (static_branch_likely(&have_pmull) && may_use_simd()) { + kernel_neon_begin(); + polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]); + kernel_neon_end(); + } else { + polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]); + } +} + +static void polyval_blocks_arch(struct polyval_elem *acc, + const struct polyval_key *key, + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_pmull) && may_use_simd()) { + do { + /* Allow rescheduling every 4 KiB. */ + size_t n = min_t(size_t, nblocks, + 4096 / POLYVAL_BLOCK_SIZE); + + kernel_neon_begin(); + polyval_blocks_pmull(acc, key, data, n); + kernel_neon_end(); + data += n * POLYVAL_BLOCK_SIZE; + nblocks -= n; + } while (nblocks); + } else { + polyval_blocks_generic(acc, &key->h_powers[NUM_H_POWERS - 1], + data, nblocks); + } +} + +#define polyval_mod_init_arch polyval_mod_init_arch +static void polyval_mod_init_arch(void) +{ + if (cpu_have_named_feature(PMULL)) + static_branch_enable(&have_pmull); +}