mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge branch 'bpf-nested-rcu-critical-sections'
Puranjay Mohan says: ==================== bpf: Nested rcu critical sections v1: https://lore.kernel.org/bpf/20250916113622.19540-1-puranjay@kernel.org/ Changes in v1->v2: - Move the addition of new tests to a separate patch (Alexei) - Avoid incrementing active_rcu_locks at two places (Eduard) Support nested rcu critical sections by making the boolean flag active_rcu_lock a counter and use it to manage rcu critical section state. bpf_rcu_read_lock() increments this counter and bpf_rcu_read_unlock() decrements it, MEM_RCU -> PTR_UNTRUSTED transition happens when active_rcu_locks drops to 0. ==================== Link: https://patch.msgid.link/20251117200411.25563-1-puranjay@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -416,7 +416,7 @@ struct bpf_verifier_state {
|
||||
u32 active_irq_id;
|
||||
u32 active_lock_id;
|
||||
void *active_lock_ptr;
|
||||
bool active_rcu_lock;
|
||||
u32 active_rcu_locks;
|
||||
|
||||
bool speculative;
|
||||
bool in_sleepable;
|
||||
|
||||
@@ -1437,7 +1437,7 @@ static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf
|
||||
dst->acquired_refs = src->acquired_refs;
|
||||
dst->active_locks = src->active_locks;
|
||||
dst->active_preempt_locks = src->active_preempt_locks;
|
||||
dst->active_rcu_lock = src->active_rcu_lock;
|
||||
dst->active_rcu_locks = src->active_rcu_locks;
|
||||
dst->active_irq_id = src->active_irq_id;
|
||||
dst->active_lock_id = src->active_lock_id;
|
||||
dst->active_lock_ptr = src->active_lock_ptr;
|
||||
@@ -5889,7 +5889,7 @@ static bool in_sleepable(struct bpf_verifier_env *env)
|
||||
*/
|
||||
static bool in_rcu_cs(struct bpf_verifier_env *env)
|
||||
{
|
||||
return env->cur_state->active_rcu_lock ||
|
||||
return env->cur_state->active_rcu_locks ||
|
||||
env->cur_state->active_locks ||
|
||||
!in_sleepable(env);
|
||||
}
|
||||
@@ -10744,7 +10744,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
}
|
||||
|
||||
if (env->subprog_info[subprog].might_sleep &&
|
||||
(env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks ||
|
||||
(env->cur_state->active_rcu_locks || env->cur_state->active_preempt_locks ||
|
||||
env->cur_state->active_irq_id || !in_sleepable(env))) {
|
||||
verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n"
|
||||
"i.e., in a RCU/IRQ/preempt-disabled section, or in\n"
|
||||
@@ -11327,7 +11327,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (check_lock && env->cur_state->active_rcu_lock) {
|
||||
if (check_lock && env->cur_state->active_rcu_locks) {
|
||||
verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -11465,7 +11465,7 @@ static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
|
||||
/* Check if we're in a sleepable context. */
|
||||
static inline bool in_sleepable_context(struct bpf_verifier_env *env)
|
||||
{
|
||||
return !env->cur_state->active_rcu_lock &&
|
||||
return !env->cur_state->active_rcu_locks &&
|
||||
!env->cur_state->active_preempt_locks &&
|
||||
!env->cur_state->active_irq_id &&
|
||||
in_sleepable(env);
|
||||
@@ -11531,7 +11531,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
||||
return err;
|
||||
}
|
||||
|
||||
if (env->cur_state->active_rcu_lock) {
|
||||
if (env->cur_state->active_rcu_locks) {
|
||||
if (fn->might_sleep) {
|
||||
verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
|
||||
func_id_name(func_id), func_id);
|
||||
@@ -14038,36 +14038,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
|
||||
preempt_enable = is_kfunc_bpf_preempt_enable(&meta);
|
||||
|
||||
if (env->cur_state->active_rcu_lock) {
|
||||
if (rcu_lock) {
|
||||
env->cur_state->active_rcu_locks++;
|
||||
} else if (rcu_unlock) {
|
||||
struct bpf_func_state *state;
|
||||
struct bpf_reg_state *reg;
|
||||
u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
|
||||
|
||||
if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
|
||||
verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (rcu_lock) {
|
||||
verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
|
||||
if (env->cur_state->active_rcu_locks == 0) {
|
||||
verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
|
||||
return -EINVAL;
|
||||
} else if (rcu_unlock) {
|
||||
}
|
||||
if (--env->cur_state->active_rcu_locks == 0) {
|
||||
bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
|
||||
if (reg->type & MEM_RCU) {
|
||||
reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
|
||||
reg->type |= PTR_UNTRUSTED;
|
||||
}
|
||||
}));
|
||||
env->cur_state->active_rcu_lock = false;
|
||||
} else if (sleepable) {
|
||||
verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
|
||||
return -EACCES;
|
||||
}
|
||||
} else if (rcu_lock) {
|
||||
env->cur_state->active_rcu_lock = true;
|
||||
} else if (rcu_unlock) {
|
||||
verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
|
||||
return -EINVAL;
|
||||
} else if (sleepable && env->cur_state->active_rcu_locks) {
|
||||
verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
|
||||
verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (env->cur_state->active_preempt_locks) {
|
||||
@@ -19387,7 +19384,7 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c
|
||||
if (old->active_preempt_locks != cur->active_preempt_locks)
|
||||
return false;
|
||||
|
||||
if (old->active_rcu_lock != cur->active_rcu_lock)
|
||||
if (old->active_rcu_locks != cur->active_rcu_locks)
|
||||
return false;
|
||||
|
||||
if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
|
||||
|
||||
@@ -28,6 +28,7 @@ static void test_success(void)
|
||||
bpf_program__set_autoload(skel->progs.two_regions, true);
|
||||
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
|
||||
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
|
||||
bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
|
||||
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
|
||||
bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
|
||||
bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
|
||||
@@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = {
|
||||
"non_sleepable_rcu_mismatch",
|
||||
"inproper_sleepable_helper",
|
||||
"inproper_sleepable_kfunc",
|
||||
"nested_rcu_region",
|
||||
"nested_rcu_region_unbalanced_1",
|
||||
"nested_rcu_region_unbalanced_2",
|
||||
"rcu_read_lock_global_subprog_lock",
|
||||
"rcu_read_lock_global_subprog_unlock",
|
||||
"rcu_read_lock_sleepable_helper_global_subprog",
|
||||
|
||||
@@ -278,6 +278,46 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
|
||||
int nested_rcu_region_unbalanced_1(void *ctx)
|
||||
{
|
||||
struct task_struct *task, *real_parent;
|
||||
|
||||
/* nested rcu read lock regions */
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
|
||||
int nested_rcu_region_unbalanced_2(void *ctx)
|
||||
{
|
||||
struct task_struct *task, *real_parent;
|
||||
|
||||
/* nested rcu read lock regions */
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
bpf_rcu_read_lock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
|
||||
int task_trusted_non_rcuptr(void *ctx)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user