arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack

Commit aefbab8e77

  ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")

added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-negligible bloat of task_struct, and the resulting memory
overhead may impact performance on systems with many processes.

This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and so it is
possible to transparently allocate this buffer on the caller's stack
instead.

So tweak the 'ksimd' scoped guard implementation so that a stack buffer
is allocated and passed to both kernel_neon_begin() and
kernel_neon_end(), and either record it in the task struct, or use it
directly to preserve the task mode kernel FP/SIMD when running in
softirq context. Passing the address to both functions, and checking the
addresses for consistency ensures that callers of the updated bare
begin/end API use it in a manner that is consistent with the new context
switch semantics.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
This commit is contained in:
Ard Biesheuvel
2025-10-01 13:59:42 +02:00
parent 103728a716
commit 4fa617cc68
5 changed files with 55 additions and 21 deletions

View File

@@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
{
BUG_ON(!in_task());
preempt_disable();
kernel_neon_begin();
kernel_neon_begin(NULL);
}
static inline void kernel_fpu_end(void)
{
kernel_neon_end();
kernel_neon_end(NULL);
preempt_enable();
}

View File

@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
void kernel_neon_begin(void);
void kernel_neon_end(void);
void kernel_neon_begin(struct user_fpsimd_state *);
void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */

View File

@@ -172,7 +172,12 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
struct user_fpsimd_state kernel_fpsimd_state;
/*
* Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
* address of a caller provided buffer that will be used to preserve a
* task's kernel mode FPSIMD state while it is scheduled out.
*/
struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;

View File

@@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
DEFINE_LOCK_GUARD_1(ksimd,
struct user_fpsimd_state,
kernel_neon_begin(_T->lock),
kernel_neon_end(_T->lock))
#define scoped_ksimd() scoped_guard(ksimd)
#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
#endif

View File

@@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
if (last->st == &task->thread.kernel_fpsimd_state &&
if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
fpsimd_load_state(&task->thread.kernel_fpsimd_state);
fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
.st = &task->thread.kernel_fpsimd_state,
.st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
fpsimd_save_state(&task->thread.kernel_fpsimd_state);
BUG_ON(!cpu_fp_state.st);
fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,12 +1836,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*
* Unless called from non-preemptible task context, @state must point to a
* caller provided buffer that will be used to preserve the task's kernel mode
* FPSIMD context when it is scheduled out, or if it is interrupted by kernel
* mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
void kernel_neon_begin(void)
void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
WARN_ON((preemptible() || in_serving_softirq()) && !state);
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@@ -1846,7 +1856,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
fpsimd_save_kernel_state(current);
fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1867,8 +1877,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
/*
* Record the caller provided buffer as the kernel mode
* FP/SIMD buffer for this task, so that the state can
* be preserved and restored on a context switch.
*/
WARN_ON(current->thread.kernel_fpsimd_state != NULL);
current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
}
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1886,22 +1904,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*
* The value of @state must match the value passed to the preceding call to
* kernel_neon_begin().
*/
void kernel_neon_end(void)
void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
if (!test_thread_flag(TIF_KERNEL_FPSTATE))
return;
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
else
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
fpsimd_load_state(state);
} else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
WARN_ON(current->thread.kernel_fpsimd_state != state);
current->thread.kernel_fpsimd_state = NULL;
}
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1937,7 +1963,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
kernel_neon_begin();
kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
@@ -1986,7 +2012,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
kernel_neon_end();
kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;