mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 11:56:58 +00:00
Merge tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Convert selftests/bpf/test_tc_edt and test_tc_tunnel from .sh to test_progs runner (Alexis Lothoré) - Convert selftests/bpf/test_xsk to test_progs runner (Bastien Curutchet) - Replace bpf memory allocator with kmalloc_nolock() in bpf_local_storage (Amery Hung), and in bpf streams and range tree (Puranjay Mohan) - Introduce support for indirect jumps in BPF verifier and x86 JIT (Anton Protopopov) and arm64 JIT (Puranjay Mohan) - Remove runqslower bpf tool (Hoyeon Lee) - Fix corner cases in the verifier to close several syzbot reports (Eduard Zingerman, KaFai Wan) - Several improvements in deadlock detection in rqspinlock (Kumar Kartikeya Dwivedi) - Implement "jmp" mode for BPF trampoline and corresponding DYNAMIC_FTRACE_WITH_JMP. It improves "fexit" program type performance from 80 M/s to 136 M/s. With Steven's Ack. (Menglong Dong) - Add ability to test non-linear skbs in BPF_PROG_TEST_RUN (Paul Chaignon) - Do not let BPF_PROG_TEST_RUN emit invalid GSO types to stack (Daniel Borkmann) - Generalize buildid reader into bpf_dynptr (Mykyta Yatsenko) - Optimize bpf_map_update_elem() for map-in-map types (Ritesh Oedayrajsingh Varma) - Introduce overwrite mode for BPF ring buffer (Xu Kuohai) * tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (169 commits) bpf: optimize bpf_map_update_elem() for map-in-map types bpf: make kprobe_multi_link_prog_run always_inline selftests/bpf: do not hardcode target rate in test_tc_edt BPF program selftests/bpf: remove test_tc_edt.sh selftests/bpf: integrate test_tc_edt into test_progs selftests/bpf: rename test_tc_edt.bpf.c section to expose program type selftests/bpf: Add success stats to rqspinlock stress test rqspinlock: Precede non-head waiter queueing with AA check rqspinlock: Disable spinning for trylock fallback rqspinlock: Use trylock fallback when per-CPU rqnode is busy rqspinlock: Perform AA checks immediately rqspinlock: Enclose lock/unlock within lock entry acquisitions bpf: Remove runqslower tool selftests/bpf: Remove usage of lsm/file_alloc_security in selftest bpf: Disable file_alloc_security hook bpf: check for insn arrays in check_ptr_alignment bpf: force BPF_F_RDONLY_PROG on insn array creation bpf: Fix exclusive map memory leak selftests/bpf: Make CS length configurable for rqspinlock stress test selftests/bpf: Add lock wait time stats to rqspinlock stress test ...
This commit is contained in:
@@ -100,10 +100,26 @@ described in more detail in the footnotes.
|
||||
| | | ``uretprobe.s+`` [#uprobe]_ | Yes |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``usdt+`` [#usdt]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``usdt.s+`` [#usdt]_ | Yes |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TRACE_KPROBE_MULTI`` | ``kprobe.multi+`` [#kpmulti]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``kretprobe.multi+`` [#kpmulti]_ | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TRACE_KPROBE_SESSION`` | ``kprobe.session+`` [#kpmulti]_ | |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TRACE_UPROBE_MULTI`` | ``uprobe.multi+`` [#upmul]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``uprobe.multi.s+`` [#upmul]_ | Yes |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``uretprobe.multi+`` [#upmul]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``uretprobe.multi.s+`` [#upmul]_ | Yes |
|
||||
+ +----------------------------------------+----------------------------------+-----------+
|
||||
| | ``BPF_TRACE_UPROBE_SESSION`` | ``uprobe.session+`` [#upmul]_ | |
|
||||
+ + +----------------------------------+-----------+
|
||||
| | | ``uprobe.session.s+`` [#upmul]_ | Yes |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
| ``BPF_PROG_TYPE_LIRC_MODE2`` | ``BPF_LIRC_MODE2`` | ``lirc_mode2`` | |
|
||||
+-------------------------------------------+----------------------------------------+----------------------------------+-----------+
|
||||
@@ -219,6 +235,8 @@ described in more detail in the footnotes.
|
||||
non-negative integer.
|
||||
.. [#ksyscall] The ``ksyscall`` attach format is ``ksyscall/<syscall>``.
|
||||
.. [#uprobe] The ``uprobe`` attach format is ``uprobe[.s]/<path>:<function>[+<offset>]``.
|
||||
.. [#upmul] The ``uprobe.multi`` attach format is ``uprobe.multi[.s]/<path>:<function-pattern>``
|
||||
where ``function-pattern`` supports ``*`` and ``?`` wildcards.
|
||||
.. [#usdt] The ``usdt`` attach format is ``usdt/<path>:<provider>:<name>``.
|
||||
.. [#kpmulti] The ``kprobe.multi`` attach format is ``kprobe.multi/<pattern>`` where ``pattern``
|
||||
supports ``*`` and ``?`` wildcards. Valid characters for pattern are
|
||||
|
||||
@@ -15,8 +15,9 @@ of constant size. The size of the array is defined in ``max_entries`` at
|
||||
creation time. All array elements are pre-allocated and zero initialized when
|
||||
created. ``BPF_MAP_TYPE_PERCPU_ARRAY`` uses a different memory region for each
|
||||
CPU whereas ``BPF_MAP_TYPE_ARRAY`` uses the same memory region. The value
|
||||
stored can be of any size, however, all array elements are aligned to 8
|
||||
bytes.
|
||||
stored can be of any size for ``BPF_MAP_TYPE_ARRAY`` and not more than
|
||||
``PCPU_MIN_UNIT_SIZE`` (32 kB) for ``BPF_MAP_TYPE_PERCPU_ARRAY``. All
|
||||
array elements are aligned to 8 bytes.
|
||||
|
||||
Since kernel 5.5, memory mapping may be enabled for ``BPF_MAP_TYPE_ARRAY`` by
|
||||
setting the flag ``BPF_F_MMAPABLE``. The map definition is page-aligned and
|
||||
|
||||
@@ -4654,6 +4654,7 @@ F: Documentation/userspace-api/ebpf/
|
||||
F: arch/*/net/*
|
||||
F: include/linux/bpf*
|
||||
F: include/linux/btf*
|
||||
F: include/linux/buildid.h
|
||||
F: include/linux/filter.h
|
||||
F: include/trace/events/xdp.h
|
||||
F: include/uapi/linux/bpf*
|
||||
|
||||
@@ -1452,6 +1452,10 @@ emit_bswap_uxt:
|
||||
emit(A64_ASR(is64, dst, dst, imm), ctx);
|
||||
break;
|
||||
|
||||
/* JUMP reg */
|
||||
case BPF_JMP | BPF_JA | BPF_X:
|
||||
emit(A64_BR(dst), ctx);
|
||||
break;
|
||||
/* JUMP off */
|
||||
case BPF_JMP | BPF_JA:
|
||||
case BPF_JMP32 | BPF_JA:
|
||||
@@ -2231,6 +2235,13 @@ skip_init_ctx:
|
||||
for (i = 0; i <= prog->len; i++)
|
||||
ctx.offset[i] *= AARCH64_INSN_SIZE;
|
||||
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
|
||||
/*
|
||||
* The bpf_prog_update_insn_ptrs function expects offsets to
|
||||
* point to the first byte of the jitted instruction (unlike
|
||||
* the bpf_prog_fill_jited_linfo above, which, for historical
|
||||
* reasons, expects to point to the next instruction)
|
||||
*/
|
||||
bpf_prog_update_insn_ptrs(prog, ctx.offset, ctx.ro_image);
|
||||
out_off:
|
||||
if (!ro_header && priv_stack_ptr) {
|
||||
free_percpu(priv_stack_ptr);
|
||||
@@ -2923,8 +2934,9 @@ static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
|
||||
* The dummy_tramp is used to prevent another CPU from jumping to unknown
|
||||
* locations during the patching process, making the patching process easier.
|
||||
*/
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
int ret;
|
||||
u32 old_insn;
|
||||
@@ -2968,14 +2980,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
!poking_bpf_entry))
|
||||
return -EINVAL;
|
||||
|
||||
if (poke_type == BPF_MOD_CALL)
|
||||
branch_type = AARCH64_INSN_BRANCH_LINK;
|
||||
else
|
||||
branch_type = AARCH64_INSN_BRANCH_NOLINK;
|
||||
|
||||
branch_type = old_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
|
||||
AARCH64_INSN_BRANCH_NOLINK;
|
||||
if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
|
||||
return -EFAULT;
|
||||
|
||||
branch_type = new_t == BPF_MOD_CALL ? AARCH64_INSN_BRANCH_LINK :
|
||||
AARCH64_INSN_BRANCH_NOLINK;
|
||||
if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
|
||||
return -EFAULT;
|
||||
|
||||
|
||||
@@ -1284,11 +1284,12 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
|
||||
return ret ? ERR_PTR(-EINVAL) : dst;
|
||||
}
|
||||
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
int ret;
|
||||
bool is_call = (poke_type == BPF_MOD_CALL);
|
||||
bool is_call;
|
||||
u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
|
||||
u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP};
|
||||
|
||||
@@ -1298,6 +1299,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
if (!is_bpf_text_address((unsigned long)ip))
|
||||
return -ENOTSUPP;
|
||||
|
||||
is_call = old_t == BPF_MOD_CALL;
|
||||
ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call);
|
||||
if (ret)
|
||||
return ret;
|
||||
@@ -1305,6 +1307,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES))
|
||||
return -EFAULT;
|
||||
|
||||
is_call = new_t == BPF_MOD_CALL;
|
||||
ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -1107,8 +1107,9 @@ static void do_isync(void *info __maybe_unused)
|
||||
* execute isync (or some CSI) so that they don't go back into the
|
||||
* trampoline again.
|
||||
*/
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
unsigned long bpf_func, bpf_func_end, size, offset;
|
||||
ppc_inst_t old_inst, new_inst;
|
||||
@@ -1119,7 +1120,6 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
bpf_func = (unsigned long)ip;
|
||||
branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
|
||||
|
||||
/* We currently only support poking bpf programs */
|
||||
if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
|
||||
@@ -1132,7 +1132,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
* an unconditional branch instruction at im->ip_after_call
|
||||
*/
|
||||
if (offset) {
|
||||
if (poke_type != BPF_MOD_JUMP) {
|
||||
if (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL) {
|
||||
pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__,
|
||||
bpf_func);
|
||||
return -EOPNOTSUPP;
|
||||
@@ -1166,6 +1166,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
}
|
||||
|
||||
old_inst = ppc_inst(PPC_RAW_NOP());
|
||||
branch_flags = old_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
|
||||
if (old_addr) {
|
||||
if (is_offset_in_branch_range(ip - old_addr))
|
||||
create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags);
|
||||
@@ -1174,6 +1175,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
branch_flags);
|
||||
}
|
||||
new_inst = ppc_inst(PPC_RAW_NOP());
|
||||
branch_flags = new_t == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
|
||||
if (new_addr) {
|
||||
if (is_offset_in_branch_range(ip - new_addr))
|
||||
create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags);
|
||||
|
||||
@@ -852,17 +852,19 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call)
|
||||
return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx);
|
||||
}
|
||||
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS];
|
||||
bool is_call = poke_type == BPF_MOD_CALL;
|
||||
bool is_call;
|
||||
int ret;
|
||||
|
||||
if (!is_kernel_text((unsigned long)ip) &&
|
||||
!is_bpf_text_address((unsigned long)ip))
|
||||
return -ENOTSUPP;
|
||||
|
||||
is_call = old_t == BPF_MOD_CALL;
|
||||
ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call);
|
||||
if (ret)
|
||||
return ret;
|
||||
@@ -870,6 +872,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
|
||||
if (memcmp(ip, old_insns, RV_FENTRY_NBYTES))
|
||||
return -EFAULT;
|
||||
|
||||
is_call = new_t == BPF_MOD_CALL;
|
||||
ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call);
|
||||
if (ret)
|
||||
return ret;
|
||||
@@ -1131,7 +1134,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
|
||||
store_args(nr_arg_slots, args_off, ctx);
|
||||
|
||||
/* skip to actual body of traced function */
|
||||
if (flags & BPF_TRAMP_F_SKIP_FRAME)
|
||||
if (flags & BPF_TRAMP_F_ORIG_STACK)
|
||||
orig_call += RV_FENTRY_NINSNS * 4;
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
|
||||
@@ -2412,8 +2412,9 @@ bool bpf_jit_supports_far_kfunc_call(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
struct bpf_plt expected_plt, current_plt, new_plt, *plt;
|
||||
struct {
|
||||
@@ -2430,7 +2431,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
|
||||
return -EINVAL;
|
||||
|
||||
if (t == BPF_MOD_JUMP &&
|
||||
if ((new_t == BPF_MOD_JUMP || old_t == BPF_MOD_JUMP) &&
|
||||
insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
|
||||
/*
|
||||
* The branch already points to the destination,
|
||||
|
||||
@@ -230,6 +230,7 @@ config X86
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64
|
||||
select HAVE_FTRACE_REGS_HAVING_PT_REGS if X86_64
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_JMP if X86_64
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT if X86_64
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
|
||||
select HAVE_EBPF_JIT
|
||||
|
||||
@@ -74,7 +74,12 @@ static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
|
||||
* No need to translate into a callthunk. The trampoline does
|
||||
* the depth accounting itself.
|
||||
*/
|
||||
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
|
||||
if (ftrace_is_jmp(addr)) {
|
||||
addr = ftrace_jmp_get(addr);
|
||||
return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
|
||||
} else {
|
||||
return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
|
||||
}
|
||||
}
|
||||
|
||||
static int ftrace_verify_code(unsigned long ip, const char *old_code)
|
||||
|
||||
@@ -285,8 +285,18 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
|
||||
ANNOTATE_NOENDBR
|
||||
RET
|
||||
|
||||
1:
|
||||
testb $1, %al
|
||||
jz 2f
|
||||
andq $0xfffffffffffffffe, %rax
|
||||
movq %rax, MCOUNT_REG_SIZE+8(%rsp)
|
||||
restore_mcount_regs
|
||||
/* Restore flags */
|
||||
popfq
|
||||
RET
|
||||
|
||||
/* Swap the flags with orig_rax */
|
||||
1: movq MCOUNT_REG_SIZE(%rsp), %rdi
|
||||
2: movq MCOUNT_REG_SIZE(%rsp), %rdi
|
||||
movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
|
||||
movq %rax, MCOUNT_REG_SIZE(%rsp)
|
||||
|
||||
|
||||
@@ -597,7 +597,8 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
|
||||
return emit_patch(pprog, func, ip, 0xE9);
|
||||
}
|
||||
|
||||
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t,
|
||||
void *old_addr, void *new_addr)
|
||||
{
|
||||
const u8 *nop_insn = x86_nops[5];
|
||||
@@ -607,9 +608,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
int ret;
|
||||
|
||||
memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
|
||||
if (old_addr) {
|
||||
if (old_t != BPF_MOD_NOP && old_addr) {
|
||||
prog = old_insn;
|
||||
ret = t == BPF_MOD_CALL ?
|
||||
ret = old_t == BPF_MOD_CALL ?
|
||||
emit_call(&prog, old_addr, ip) :
|
||||
emit_jump(&prog, old_addr, ip);
|
||||
if (ret)
|
||||
@@ -617,9 +618,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
}
|
||||
|
||||
memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
|
||||
if (new_addr) {
|
||||
if (new_t != BPF_MOD_NOP && new_addr) {
|
||||
prog = new_insn;
|
||||
ret = t == BPF_MOD_CALL ?
|
||||
ret = new_t == BPF_MOD_CALL ?
|
||||
emit_call(&prog, new_addr, ip) :
|
||||
emit_jump(&prog, new_addr, ip);
|
||||
if (ret)
|
||||
@@ -640,8 +641,9 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
void *old_addr, void *new_addr)
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
if (!is_kernel_text((long)ip) &&
|
||||
!is_bpf_text_address((long)ip))
|
||||
@@ -655,29 +657,43 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
if (is_endbr(ip))
|
||||
ip += ENDBR_INSN_SIZE;
|
||||
|
||||
return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
|
||||
return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
|
||||
}
|
||||
|
||||
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
|
||||
|
||||
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
|
||||
static void __emit_indirect_jump(u8 **pprog, int reg, bool ereg)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
if (ereg)
|
||||
EMIT1(0x41);
|
||||
|
||||
EMIT2(0xFF, 0xE0 + reg);
|
||||
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_indirect_jump(u8 **pprog, int bpf_reg, u8 *ip)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
int reg = reg2hex[bpf_reg];
|
||||
bool ereg = is_ereg(bpf_reg);
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
|
||||
OPTIMIZER_HIDE_VAR(reg);
|
||||
emit_jump(&prog, its_static_thunk(reg), ip);
|
||||
emit_jump(&prog, its_static_thunk(reg + 8*ereg), ip);
|
||||
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
|
||||
EMIT_LFENCE();
|
||||
EMIT2(0xFF, 0xE0 + reg);
|
||||
__emit_indirect_jump(&prog, reg, ereg);
|
||||
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
|
||||
OPTIMIZER_HIDE_VAR(reg);
|
||||
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
|
||||
emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
|
||||
emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg + 8*ereg], ip);
|
||||
else
|
||||
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
|
||||
emit_jump(&prog, &__x86_indirect_thunk_array[reg + 8*ereg], ip);
|
||||
} else {
|
||||
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
|
||||
__emit_indirect_jump(&prog, reg, ereg);
|
||||
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS))
|
||||
EMIT1(0xCC); /* int3 */
|
||||
}
|
||||
@@ -797,7 +813,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
|
||||
* rdi == ctx (1st arg)
|
||||
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
|
||||
*/
|
||||
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
|
||||
emit_indirect_jump(&prog, BPF_REG_4 /* R4 -> rcx */, ip + (prog - start));
|
||||
|
||||
/* out: */
|
||||
ctx->tail_call_indirect_label = prog - start;
|
||||
@@ -883,12 +899,13 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
|
||||
target = array->ptrs[poke->tail_call.key];
|
||||
if (target) {
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_target,
|
||||
BPF_MOD_JUMP, NULL,
|
||||
BPF_MOD_NOP, BPF_MOD_JUMP,
|
||||
NULL,
|
||||
(u8 *)target->bpf_func +
|
||||
poke->adj_off);
|
||||
BUG_ON(ret < 0);
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
|
||||
BPF_MOD_JUMP,
|
||||
BPF_MOD_JUMP, BPF_MOD_NOP,
|
||||
(u8 *)poke->tailcall_target +
|
||||
X86_PATCH_SIZE, NULL);
|
||||
BUG_ON(ret < 0);
|
||||
@@ -2614,6 +2631,9 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
|
||||
|
||||
break;
|
||||
|
||||
case BPF_JMP | BPF_JA | BPF_X:
|
||||
emit_indirect_jump(&prog, insn->dst_reg, image + addrs[i - 1]);
|
||||
break;
|
||||
case BPF_JMP | BPF_JA:
|
||||
case BPF_JMP32 | BPF_JA:
|
||||
if (BPF_CLASS(insn->code) == BPF_JMP) {
|
||||
@@ -2830,9 +2850,10 @@ static int get_nr_used_regs(const struct btf_func_model *m)
|
||||
}
|
||||
|
||||
static void save_args(const struct btf_func_model *m, u8 **prog,
|
||||
int stack_size, bool for_call_origin)
|
||||
int stack_size, bool for_call_origin, u32 flags)
|
||||
{
|
||||
int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
|
||||
bool use_jmp = bpf_trampoline_use_jmp(flags);
|
||||
int i, j;
|
||||
|
||||
/* Store function arguments to stack.
|
||||
@@ -2873,7 +2894,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog,
|
||||
*/
|
||||
for (j = 0; j < arg_regs; j++) {
|
||||
emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
|
||||
nr_stack_slots * 8 + 0x18);
|
||||
nr_stack_slots * 8 + 16 + (!use_jmp) * 8);
|
||||
emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
|
||||
-stack_size);
|
||||
|
||||
@@ -3267,12 +3288,17 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
|
||||
* should be 16-byte aligned. Following code depend on
|
||||
* that stack_size is already 8-byte aligned.
|
||||
*/
|
||||
stack_size += (stack_size % 16) ? 0 : 8;
|
||||
if (bpf_trampoline_use_jmp(flags)) {
|
||||
/* no rip in the "jmp" case */
|
||||
stack_size += (stack_size % 16) ? 8 : 0;
|
||||
} else {
|
||||
stack_size += (stack_size % 16) ? 0 : 8;
|
||||
}
|
||||
}
|
||||
|
||||
arg_stack_off = stack_size;
|
||||
|
||||
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
/* skip patched call instruction and point orig_call to actual
|
||||
* body of the kernel function.
|
||||
*/
|
||||
@@ -3327,7 +3353,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
|
||||
}
|
||||
|
||||
save_args(m, &prog, regs_off, false);
|
||||
save_args(m, &prog, regs_off, false, flags);
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
/* arg1: mov rdi, im */
|
||||
@@ -3360,7 +3386,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
restore_regs(m, &prog, regs_off);
|
||||
save_args(m, &prog, arg_stack_off, true);
|
||||
save_args(m, &prog, arg_stack_off, true, flags);
|
||||
|
||||
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
|
||||
/* Before calling the original function, load the
|
||||
@@ -3543,7 +3569,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf));
|
||||
emit_indirect_jump(&prog, BPF_REG_3 /* R3 -> rdx */, image + (prog - buf));
|
||||
|
||||
*pprog = prog;
|
||||
return 0;
|
||||
@@ -3827,6 +3853,15 @@ out_image:
|
||||
jit_data->header = header;
|
||||
jit_data->rw_header = rw_header;
|
||||
}
|
||||
|
||||
/*
|
||||
* The bpf_prog_update_insn_ptrs function expects addrs to
|
||||
* point to the first byte of the jitted instruction (unlike
|
||||
* the bpf_prog_fill_jited_linfo below, which, for historical
|
||||
* reasons, expects to point to the next instruction)
|
||||
*/
|
||||
bpf_prog_update_insn_ptrs(prog, addrs, image);
|
||||
|
||||
/*
|
||||
* ctx.prog_offset is used when CFI preambles put code *before*
|
||||
* the function. See emit_cfi(). For FineIBT specifically this code
|
||||
@@ -3953,6 +3988,7 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
|
||||
struct bpf_prog *new, struct bpf_prog *old)
|
||||
{
|
||||
u8 *old_addr, *new_addr, *old_bypass_addr;
|
||||
enum bpf_text_poke_type t;
|
||||
int ret;
|
||||
|
||||
old_bypass_addr = old ? NULL : poke->bypass_addr;
|
||||
@@ -3965,21 +4001,22 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
|
||||
* the kallsyms check.
|
||||
*/
|
||||
if (new) {
|
||||
t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_target,
|
||||
BPF_MOD_JUMP,
|
||||
t, BPF_MOD_JUMP,
|
||||
old_addr, new_addr);
|
||||
BUG_ON(ret < 0);
|
||||
if (!old) {
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
|
||||
BPF_MOD_JUMP,
|
||||
BPF_MOD_JUMP, BPF_MOD_NOP,
|
||||
poke->bypass_addr,
|
||||
NULL);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
} else {
|
||||
t = old_bypass_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
|
||||
BPF_MOD_JUMP,
|
||||
old_bypass_addr,
|
||||
t, BPF_MOD_JUMP, old_bypass_addr,
|
||||
poke->bypass_addr);
|
||||
BUG_ON(ret < 0);
|
||||
/* let other CPUs finish the execution of program
|
||||
@@ -3988,9 +4025,9 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
|
||||
*/
|
||||
if (!ret)
|
||||
synchronize_rcu();
|
||||
t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
|
||||
ret = __bpf_arch_text_poke(poke->tailcall_target,
|
||||
BPF_MOD_JUMP,
|
||||
old_addr, NULL);
|
||||
t, BPF_MOD_NOP, old_addr, NULL);
|
||||
BUG_ON(ret < 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,8 +129,8 @@ dec:
|
||||
* <error> for lock B
|
||||
* release_held_lock_entry
|
||||
*
|
||||
* try_cmpxchg_acquire for lock A
|
||||
* grab_held_lock_entry
|
||||
* try_cmpxchg_acquire for lock A
|
||||
*
|
||||
* Lack of any ordering means reordering may occur such that dec, inc
|
||||
* are done before entry is overwritten. This permits a remote lock
|
||||
@@ -139,13 +139,8 @@ dec:
|
||||
* CPU holds a lock it is attempting to acquire, leading to false ABBA
|
||||
* diagnosis).
|
||||
*
|
||||
* In case of unlock, we will always do a release on the lock word after
|
||||
* releasing the entry, ensuring that other CPUs cannot hold the lock
|
||||
* (and make conclusions about deadlocks) until the entry has been
|
||||
* cleared on the local CPU, preventing any anomalies. Reordering is
|
||||
* still possible there, but a remote CPU cannot observe a lock in our
|
||||
* table which it is already holding, since visibility entails our
|
||||
* release store for the said lock has not retired.
|
||||
* The case of unlock is treated differently due to NMI reentrancy, see
|
||||
* comments in res_spin_unlock.
|
||||
*
|
||||
* In theory we don't have a problem if the dec and WRITE_ONCE above get
|
||||
* reordered with each other, we either notice an empty NULL entry on
|
||||
@@ -175,10 +170,22 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
|
||||
{
|
||||
int val = 0;
|
||||
|
||||
if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) {
|
||||
grab_held_lock_entry(lock);
|
||||
/*
|
||||
* Grab the deadlock detection entry before doing the cmpxchg, so that
|
||||
* reentrancy due to NMIs between the succeeding cmpxchg and creation of
|
||||
* held lock entry can correctly detect an acquisition attempt in the
|
||||
* interrupted context.
|
||||
*
|
||||
* cmpxchg lock A
|
||||
* <NMI>
|
||||
* res_spin_lock(A) --> missed AA, leads to timeout
|
||||
* </NMI>
|
||||
* grab_held_lock_entry(A)
|
||||
*/
|
||||
grab_held_lock_entry(lock);
|
||||
|
||||
if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
|
||||
return 0;
|
||||
}
|
||||
return resilient_queued_spin_lock_slowpath(lock, val);
|
||||
}
|
||||
|
||||
@@ -192,28 +199,25 @@ static __always_inline void res_spin_unlock(rqspinlock_t *lock)
|
||||
{
|
||||
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
|
||||
|
||||
if (unlikely(rqh->cnt > RES_NR_HELD))
|
||||
goto unlock;
|
||||
WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
|
||||
unlock:
|
||||
/*
|
||||
* Release barrier, ensures correct ordering. See release_held_lock_entry
|
||||
* for details. Perform release store instead of queued_spin_unlock,
|
||||
* since we use this function for test-and-set fallback as well. When we
|
||||
* have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
|
||||
* Release barrier, ensures correct ordering. Perform release store
|
||||
* instead of queued_spin_unlock, since we use this function for the TAS
|
||||
* fallback as well. When we have CONFIG_QUEUED_SPINLOCKS=n, we clear
|
||||
* the full 4-byte lockword.
|
||||
*
|
||||
* Like release_held_lock_entry, we can do the release before the dec.
|
||||
* We simply care about not seeing the 'lock' in our table from a remote
|
||||
* CPU once the lock has been released, which doesn't rely on the dec.
|
||||
* Perform the smp_store_release before clearing the lock entry so that
|
||||
* NMIs landing in the unlock path can correctly detect AA issues. The
|
||||
* opposite order shown below may lead to missed AA checks:
|
||||
*
|
||||
* Unlike smp_wmb(), release is not a two way fence, hence it is
|
||||
* possible for a inc to move up and reorder with our clearing of the
|
||||
* entry. This isn't a problem however, as for a misdiagnosis of ABBA,
|
||||
* the remote CPU needs to hold this lock, which won't be released until
|
||||
* the store below is done, which would ensure the entry is overwritten
|
||||
* to NULL, etc.
|
||||
* WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL)
|
||||
* <NMI>
|
||||
* res_spin_lock(A) --> missed AA, leads to timeout
|
||||
* </NMI>
|
||||
* smp_store_release(A->locked, 0)
|
||||
*/
|
||||
smp_store_release(&lock->locked, 0);
|
||||
if (likely(rqh->cnt <= RES_NR_HELD))
|
||||
WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
|
||||
this_cpu_dec(rqspinlock_held_locks.cnt);
|
||||
}
|
||||
|
||||
|
||||
@@ -663,6 +663,16 @@ int map_check_no_btf(const struct bpf_map *map,
|
||||
bool bpf_map_meta_equal(const struct bpf_map *meta0,
|
||||
const struct bpf_map *meta1);
|
||||
|
||||
static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
|
||||
{
|
||||
return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK);
|
||||
}
|
||||
|
||||
void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
|
||||
|
||||
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
|
||||
struct bpf_dynptr *ptr__uninit);
|
||||
|
||||
extern const struct bpf_map_ops bpf_map_offload_ops;
|
||||
|
||||
/* bpf_type_flag contains a set of flags that are applicable to the values of
|
||||
@@ -785,12 +795,15 @@ enum bpf_type_flag {
|
||||
/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
|
||||
DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
/* DYNPTR points to file */
|
||||
DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS),
|
||||
|
||||
__BPF_TYPE_FLAG_MAX,
|
||||
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
|
||||
};
|
||||
|
||||
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
|
||||
| DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
|
||||
| DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE)
|
||||
|
||||
/* Max number of base types. */
|
||||
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
|
||||
@@ -988,6 +1001,7 @@ enum bpf_reg_type {
|
||||
PTR_TO_ARENA,
|
||||
PTR_TO_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_FUNC, /* reg points to a bpf program function */
|
||||
PTR_TO_INSN, /* reg points to a bpf program instruction */
|
||||
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
|
||||
__BPF_REG_TYPE_MAX,
|
||||
|
||||
@@ -1250,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
|
||||
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
|
||||
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
|
||||
static inline bool bpf_trampoline_use_jmp(u64 flags)
|
||||
{
|
||||
return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME);
|
||||
}
|
||||
#else
|
||||
static inline bool bpf_trampoline_use_jmp(u64 flags)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct bpf_ksym {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
@@ -1378,21 +1404,23 @@ enum bpf_dynptr_type {
|
||||
BPF_DYNPTR_TYPE_XDP,
|
||||
/* Points to skb_metadata_end()-skb_metadata_len() */
|
||||
BPF_DYNPTR_TYPE_SKB_META,
|
||||
/* Underlying data is a file */
|
||||
BPF_DYNPTR_TYPE_FILE,
|
||||
};
|
||||
|
||||
int bpf_dynptr_check_size(u32 size);
|
||||
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
|
||||
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
|
||||
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
|
||||
int bpf_dynptr_check_size(u64 size);
|
||||
u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
|
||||
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len);
|
||||
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len);
|
||||
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
|
||||
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
|
||||
void *src, u32 len, u64 flags);
|
||||
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
|
||||
void *buffer__opt, u32 buffer__szk);
|
||||
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
|
||||
void *src, u64 len, u64 flags);
|
||||
void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
|
||||
void *buffer__opt, u64 buffer__szk);
|
||||
|
||||
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
|
||||
static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
|
||||
{
|
||||
u32 size = __bpf_dynptr_size(ptr);
|
||||
u64 size = __bpf_dynptr_size(ptr);
|
||||
|
||||
if (len > size || offset > size - len)
|
||||
return -E2BIG;
|
||||
@@ -1616,6 +1644,7 @@ struct bpf_prog_aux {
|
||||
u32 ctx_arg_info_size;
|
||||
u32 max_rdonly_access;
|
||||
u32 max_rdwr_access;
|
||||
u32 subprog_start;
|
||||
struct btf *attach_btf;
|
||||
struct bpf_ctx_arg_aux *ctx_arg_info;
|
||||
void __percpu *priv_stack_ptr;
|
||||
@@ -1905,12 +1934,14 @@ struct btf_member;
|
||||
* reason, if this callback is not defined, the check is skipped as
|
||||
* the struct_ops map will have final verification performed in
|
||||
* @reg.
|
||||
* @type: BTF type.
|
||||
* @value_type: Value type.
|
||||
* @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs
|
||||
* provide the correct Control Flow Integrity hashes for the
|
||||
* trampolines generated by BPF struct_ops.
|
||||
* @owner: The module that owns this struct_ops. Used for module reference
|
||||
* counting to ensure the module providing the struct_ops cannot be
|
||||
* unloaded while in use.
|
||||
* @name: The name of the struct bpf_struct_ops object.
|
||||
* @func_models: Func models
|
||||
* @type_id: BTF type id.
|
||||
* @value_id: BTF value id.
|
||||
*/
|
||||
struct bpf_struct_ops {
|
||||
const struct bpf_verifier_ops *verifier_ops;
|
||||
@@ -2099,6 +2130,12 @@ struct bpf_array {
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* The bpf_array_get_next_key() function may be used for all array-like
|
||||
* maps, i.e., maps with u32 keys with range [0 ,..., max_entries)
|
||||
*/
|
||||
int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key);
|
||||
|
||||
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
|
||||
#define MAX_TAIL_CALL_CNT 33
|
||||
|
||||
@@ -2374,6 +2411,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
|
||||
bool bpf_jit_bypass_spec_v1(void);
|
||||
bool bpf_jit_bypass_spec_v4(void);
|
||||
|
||||
#define bpf_rcu_lock_held() \
|
||||
(rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held())
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
DECLARE_PER_CPU(int, bpf_prog_active);
|
||||
extern struct mutex bpf_stats_enabled_mutex;
|
||||
@@ -3670,12 +3710,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
#endif /* CONFIG_INET */
|
||||
|
||||
enum bpf_text_poke_type {
|
||||
BPF_MOD_NOP,
|
||||
BPF_MOD_CALL,
|
||||
BPF_MOD_JUMP,
|
||||
};
|
||||
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
void *addr1, void *addr2);
|
||||
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr);
|
||||
|
||||
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
|
||||
struct bpf_prog *new, struct bpf_prog *old);
|
||||
@@ -3772,4 +3814,30 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
|
||||
const char **linep, int *nump);
|
||||
struct bpf_prog *bpf_prog_find_from_stack(void);
|
||||
|
||||
int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog);
|
||||
int bpf_insn_array_ready(struct bpf_map *map);
|
||||
void bpf_insn_array_release(struct bpf_map *map);
|
||||
void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len);
|
||||
void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image);
|
||||
#else
|
||||
static inline void
|
||||
bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
|
||||
{
|
||||
if (flags & ~allowed_flags)
|
||||
return -EINVAL;
|
||||
|
||||
if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _LINUX_BPF_H */
|
||||
|
||||
@@ -18,9 +18,6 @@
|
||||
|
||||
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
|
||||
|
||||
#define bpf_rcu_lock_held() \
|
||||
(rcu_read_lock_held() || rcu_read_lock_trace_held() || \
|
||||
rcu_read_lock_bh_held())
|
||||
struct bpf_local_storage_map_bucket {
|
||||
struct hlist_head list;
|
||||
raw_spinlock_t lock;
|
||||
@@ -56,9 +53,7 @@ struct bpf_local_storage_map {
|
||||
u32 bucket_log;
|
||||
u16 elem_size;
|
||||
u16 cache_idx;
|
||||
struct bpf_mem_alloc selem_ma;
|
||||
struct bpf_mem_alloc storage_ma;
|
||||
bool bpf_ma;
|
||||
bool use_kmalloc_nolock;
|
||||
};
|
||||
|
||||
struct bpf_local_storage_data {
|
||||
@@ -100,6 +95,7 @@ struct bpf_local_storage {
|
||||
*/
|
||||
struct rcu_head rcu;
|
||||
raw_spinlock_t lock; /* Protect adding/removing from the "list" */
|
||||
bool use_kmalloc_nolock;
|
||||
};
|
||||
|
||||
/* U16_MAX is much more than enough for sk local storage
|
||||
@@ -133,7 +129,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
|
||||
struct bpf_map *
|
||||
bpf_local_storage_map_alloc(union bpf_attr *attr,
|
||||
struct bpf_local_storage_cache *cache,
|
||||
bool bpf_ma);
|
||||
bool use_kmalloc_nolock);
|
||||
|
||||
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_map *smap,
|
||||
@@ -187,10 +183,9 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
|
||||
|
||||
struct bpf_local_storage_elem *
|
||||
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
|
||||
bool charge_mem, bool swap_uptrs, gfp_t gfp_flags);
|
||||
bool swap_uptrs, gfp_t gfp_flags);
|
||||
|
||||
void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
struct bpf_local_storage_map *smap,
|
||||
bool reuse_now);
|
||||
|
||||
int
|
||||
|
||||
@@ -133,6 +133,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops)
|
||||
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
|
||||
|
||||
@@ -416,7 +416,7 @@ struct bpf_verifier_state {
|
||||
u32 active_irq_id;
|
||||
u32 active_lock_id;
|
||||
void *active_lock_ptr;
|
||||
bool active_rcu_lock;
|
||||
u32 active_rcu_locks;
|
||||
|
||||
bool speculative;
|
||||
bool in_sleepable;
|
||||
@@ -509,6 +509,15 @@ struct bpf_map_ptr_state {
|
||||
#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
|
||||
BPF_ALU_SANITIZE_DST)
|
||||
|
||||
/*
|
||||
* An array of BPF instructions.
|
||||
* Primary usage: return value of bpf_insn_successors.
|
||||
*/
|
||||
struct bpf_iarray {
|
||||
int cnt;
|
||||
u32 items[];
|
||||
};
|
||||
|
||||
struct bpf_insn_aux_data {
|
||||
union {
|
||||
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
|
||||
@@ -540,6 +549,7 @@ struct bpf_insn_aux_data {
|
||||
/* remember the offset of node field within type to rewrite */
|
||||
u64 insert_off;
|
||||
};
|
||||
struct bpf_iarray *jt; /* jump table for gotox or bpf_tailcall call instruction */
|
||||
struct btf_struct_meta *kptr_struct_meta;
|
||||
u64 map_key_state; /* constant (32 bit) key tracking for maps */
|
||||
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
||||
@@ -548,7 +558,7 @@ struct bpf_insn_aux_data {
|
||||
bool nospec_result; /* result is unsafe under speculation, nospec must follow */
|
||||
bool zext_dst; /* this insn zero extends dst reg */
|
||||
bool needs_zext; /* alu op needs to clear upper bits */
|
||||
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
|
||||
bool non_sleepable; /* helper/kfunc may be called from non-sleepable context */
|
||||
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
|
||||
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
|
||||
u8 alu_state; /* used in combination with alu_limit */
|
||||
@@ -642,6 +652,7 @@ struct bpf_subprog_info {
|
||||
u32 start; /* insn idx of function entry point */
|
||||
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
|
||||
u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
|
||||
u32 exit_idx; /* Index of one of the BPF_EXIT instructions in this subprogram */
|
||||
u16 stack_depth; /* max. stack depth used by this function */
|
||||
u16 stack_extra;
|
||||
/* offsets in range [stack_depth .. fastcall_stack_off)
|
||||
@@ -659,9 +670,9 @@ struct bpf_subprog_info {
|
||||
bool keep_fastcall_stack: 1;
|
||||
bool changes_pkt_data: 1;
|
||||
bool might_sleep: 1;
|
||||
u8 arg_cnt:3;
|
||||
|
||||
enum priv_stack_mode priv_stack_mode;
|
||||
u8 arg_cnt;
|
||||
struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
|
||||
};
|
||||
|
||||
@@ -745,8 +756,10 @@ struct bpf_verifier_env {
|
||||
struct list_head free_list; /* list of struct bpf_verifier_state_list */
|
||||
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
|
||||
struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */
|
||||
struct bpf_map *insn_array_maps[MAX_USED_MAPS]; /* array of INSN_ARRAY map's to be relocated */
|
||||
u32 used_map_cnt; /* number of used maps */
|
||||
u32 used_btf_cnt; /* number of used BTF objects */
|
||||
u32 insn_array_map_cnt; /* number of used maps of type BPF_MAP_TYPE_INSN_ARRAY */
|
||||
u32 id_gen; /* used to generate unique reg IDs */
|
||||
u32 hidden_subprog_cnt; /* number of hidden subprogs */
|
||||
int exception_callback_subprog;
|
||||
@@ -828,6 +841,8 @@ struct bpf_verifier_env {
|
||||
/* array of pointers to bpf_scc_info indexed by SCC id */
|
||||
struct bpf_scc_info **scc_info;
|
||||
u32 scc_cnt;
|
||||
struct bpf_iarray *succ;
|
||||
struct bpf_iarray *gotox_tmp_buf;
|
||||
};
|
||||
|
||||
static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
|
||||
@@ -1038,6 +1053,13 @@ static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_
|
||||
return !(off % BPF_REG_SIZE);
|
||||
}
|
||||
|
||||
static inline bool insn_is_gotox(struct bpf_insn *insn)
|
||||
{
|
||||
return BPF_CLASS(insn->code) == BPF_JMP &&
|
||||
BPF_OP(insn->code) == BPF_JA &&
|
||||
BPF_SRC(insn->code) == BPF_X;
|
||||
}
|
||||
|
||||
const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
|
||||
const char *dynptr_type_str(enum bpf_dynptr_type type);
|
||||
const char *iter_type_str(const struct btf *btf, u32 btf_id);
|
||||
@@ -1050,7 +1072,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
|
||||
|
||||
struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
|
||||
int bpf_jmp_offset(struct bpf_insn *insn);
|
||||
int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
|
||||
struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
|
||||
void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
|
||||
bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
|
||||
|
||||
|
||||
@@ -18,4 +18,29 @@ void init_vmlinux_build_id(void);
|
||||
static inline void init_vmlinux_build_id(void) { }
|
||||
#endif
|
||||
|
||||
struct freader {
|
||||
void *buf;
|
||||
u32 buf_sz;
|
||||
int err;
|
||||
union {
|
||||
struct {
|
||||
struct file *file;
|
||||
struct folio *folio;
|
||||
void *addr;
|
||||
loff_t folio_off;
|
||||
bool may_fault;
|
||||
};
|
||||
struct {
|
||||
const char *data;
|
||||
u64 data_sz;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
|
||||
struct file *file, bool may_fault);
|
||||
void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz);
|
||||
const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz);
|
||||
void freader_cleanup(struct freader *r);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -712,11 +712,13 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
|
||||
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
|
||||
|
||||
duration = sched_clock() - start;
|
||||
stats = this_cpu_ptr(prog->stats);
|
||||
flags = u64_stats_update_begin_irqsave(&stats->syncp);
|
||||
u64_stats_inc(&stats->cnt);
|
||||
u64_stats_add(&stats->nsecs, duration);
|
||||
u64_stats_update_end_irqrestore(&stats->syncp, flags);
|
||||
if (likely(prog->stats)) {
|
||||
stats = this_cpu_ptr(prog->stats);
|
||||
flags = u64_stats_update_begin_irqsave(&stats->syncp);
|
||||
u64_stats_inc(&stats->cnt);
|
||||
u64_stats_add(&stats->nsecs, duration);
|
||||
u64_stats_update_end_irqrestore(&stats->syncp, flags);
|
||||
}
|
||||
} else {
|
||||
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
|
||||
}
|
||||
|
||||
@@ -359,6 +359,7 @@ enum {
|
||||
FTRACE_OPS_FL_DIRECT = BIT(17),
|
||||
FTRACE_OPS_FL_SUBOP = BIT(18),
|
||||
FTRACE_OPS_FL_GRAPH = BIT(19),
|
||||
FTRACE_OPS_FL_JMP = BIT(20),
|
||||
};
|
||||
|
||||
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
|
||||
@@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
|
||||
unsigned long addr) { }
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
|
||||
static inline bool ftrace_is_jmp(unsigned long addr)
|
||||
{
|
||||
return addr & 1;
|
||||
}
|
||||
|
||||
static inline unsigned long ftrace_jmp_set(unsigned long addr)
|
||||
{
|
||||
return addr | 1UL;
|
||||
}
|
||||
|
||||
static inline unsigned long ftrace_jmp_get(unsigned long addr)
|
||||
{
|
||||
return addr & ~1UL;
|
||||
}
|
||||
#else
|
||||
static inline bool ftrace_is_jmp(unsigned long addr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned long ftrace_jmp_set(unsigned long addr)
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline unsigned long ftrace_jmp_get(unsigned long addr)
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
|
||||
|
||||
#ifdef CONFIG_STACK_TRACER
|
||||
|
||||
int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
|
||||
|
||||
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_USER_RINGBUF,
|
||||
BPF_MAP_TYPE_CGRP_STORAGE,
|
||||
BPF_MAP_TYPE_ARENA,
|
||||
BPF_MAP_TYPE_INSN_ARRAY,
|
||||
__MAX_BPF_MAP_TYPE
|
||||
};
|
||||
|
||||
@@ -1430,6 +1431,9 @@ enum {
|
||||
|
||||
/* Do not translate kernel bpf_arena pointers to user pointers */
|
||||
BPF_F_NO_USER_CONV = (1U << 18),
|
||||
|
||||
/* Enable BPF ringbuf overwrite mode */
|
||||
BPF_F_RB_OVERWRITE = (1U << 19),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
@@ -5618,7 +5622,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* *sk* if casting is valid, or **NULL** otherwise.
|
||||
*
|
||||
* long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
|
||||
* long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
|
||||
* Description
|
||||
* Get a dynptr to local memory *data*.
|
||||
*
|
||||
@@ -5661,7 +5665,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
|
||||
* Description
|
||||
* Read *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *src*.
|
||||
@@ -5671,7 +5675,7 @@ union bpf_attr {
|
||||
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
|
||||
* *flags* is not 0.
|
||||
*
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
|
||||
* Description
|
||||
* Write *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *dst*.
|
||||
@@ -5692,7 +5696,7 @@ union bpf_attr {
|
||||
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
|
||||
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
|
||||
*
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
|
||||
* Description
|
||||
* Get a pointer to the underlying dynptr data.
|
||||
*
|
||||
@@ -6231,6 +6235,7 @@ enum {
|
||||
BPF_RB_RING_SIZE = 1,
|
||||
BPF_RB_CONS_POS = 2,
|
||||
BPF_RB_PROD_POS = 3,
|
||||
BPF_RB_OVERWRITE_POS = 4,
|
||||
};
|
||||
|
||||
/* BPF ring buffer constants */
|
||||
@@ -7645,4 +7650,24 @@ enum bpf_kfunc_flags {
|
||||
BPF_F_PAD_ZEROS = (1ULL << 0),
|
||||
};
|
||||
|
||||
/*
|
||||
* Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
|
||||
*
|
||||
* Before the map is used the orig_off field should point to an
|
||||
* instruction inside the program being loaded. The other fields
|
||||
* must be set to 0.
|
||||
*
|
||||
* After the program is loaded, the xlated_off will be adjusted
|
||||
* by the verifier to point to the index of the original instruction
|
||||
* in the xlated program. If the instruction is deleted, it will
|
||||
* be set to (u32)-1. The jitted_off will be set to the corresponding
|
||||
* offset in the jitted image of the program.
|
||||
*/
|
||||
struct bpf_insn_array_value {
|
||||
__u32 orig_off;
|
||||
__u32 xlated_off;
|
||||
__u32 jitted_off;
|
||||
__u32 :32;
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
||||
@@ -9,7 +9,7 @@ CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
|
||||
|
||||
@@ -335,18 +335,17 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
|
||||
}
|
||||
|
||||
/* Called from syscall */
|
||||
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
u32 index = key ? *(u32 *)key : U32_MAX;
|
||||
u32 *next = (u32 *)next_key;
|
||||
|
||||
if (index >= array->map.max_entries) {
|
||||
if (index >= map->max_entries) {
|
||||
*next = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (index == array->map.max_entries - 1)
|
||||
if (index == map->max_entries - 1)
|
||||
return -ENOENT;
|
||||
|
||||
*next = index + 1;
|
||||
@@ -448,19 +447,12 @@ static void array_map_free_internal_structs(struct bpf_map *map)
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
int i;
|
||||
|
||||
/* We don't reset or free fields other than timer and workqueue
|
||||
* on uref dropping to zero.
|
||||
*/
|
||||
if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
|
||||
for (i = 0; i < array->map.max_entries; i++) {
|
||||
if (btf_record_has_field(map->record, BPF_TIMER))
|
||||
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
|
||||
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
|
||||
if (btf_record_has_field(map->record, BPF_TASK_WORK))
|
||||
bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
|
||||
}
|
||||
}
|
||||
/* We only free internal structs on uref dropping to zero */
|
||||
if (!bpf_map_has_internal_structs(map))
|
||||
return;
|
||||
|
||||
for (i = 0; i < array->map.max_entries; i++)
|
||||
bpf_map_free_internal_structs(map, array_map_elem_ptr(array, i));
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
@@ -796,7 +788,7 @@ const struct bpf_map_ops array_map_ops = {
|
||||
.map_alloc_check = array_map_alloc_check,
|
||||
.map_alloc = array_map_alloc,
|
||||
.map_free = array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_release_uref = array_map_free_internal_structs,
|
||||
.map_lookup_elem = array_map_lookup_elem,
|
||||
.map_update_elem = array_map_update_elem,
|
||||
@@ -822,7 +814,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
|
||||
.map_alloc_check = array_map_alloc_check,
|
||||
.map_alloc = array_map_alloc,
|
||||
.map_free = array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = percpu_array_map_lookup_elem,
|
||||
.map_gen_lookup = percpu_array_map_gen_lookup,
|
||||
.map_update_elem = array_map_update_elem,
|
||||
@@ -1211,7 +1203,7 @@ const struct bpf_map_ops prog_array_map_ops = {
|
||||
.map_poke_track = prog_array_map_poke_track,
|
||||
.map_poke_untrack = prog_array_map_poke_untrack,
|
||||
.map_poke_run = prog_array_map_poke_run,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = prog_fd_array_get_ptr,
|
||||
@@ -1315,7 +1307,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
|
||||
.map_alloc_check = fd_array_map_alloc_check,
|
||||
.map_alloc = array_map_alloc,
|
||||
.map_free = perf_event_fd_array_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
|
||||
@@ -1351,7 +1343,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
|
||||
.map_alloc_check = fd_array_map_alloc_check,
|
||||
.map_alloc = array_map_alloc,
|
||||
.map_free = cgroup_fd_array_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = fd_array_map_lookup_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
|
||||
@@ -1436,7 +1428,7 @@ const struct bpf_map_ops array_of_maps_map_ops = {
|
||||
.map_alloc_check = fd_array_map_alloc_check,
|
||||
.map_alloc = array_of_map_alloc,
|
||||
.map_free = array_of_map_free,
|
||||
.map_get_next_key = array_map_get_next_key,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = array_of_map_lookup_elem,
|
||||
.map_delete_elem = fd_array_map_delete_elem,
|
||||
.map_fd_get_ptr = bpf_map_fd_get_ptr,
|
||||
|
||||
304
kernel/bpf/bpf_insn_array.c
Normal file
304
kernel/bpf/bpf_insn_array.c
Normal file
@@ -0,0 +1,304 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2025 Isovalent */
|
||||
|
||||
#include <linux/bpf.h>
|
||||
|
||||
struct bpf_insn_array {
|
||||
struct bpf_map map;
|
||||
atomic_t used;
|
||||
long *ips;
|
||||
DECLARE_FLEX_ARRAY(struct bpf_insn_array_value, values);
|
||||
};
|
||||
|
||||
#define cast_insn_array(MAP_PTR) \
|
||||
container_of((MAP_PTR), struct bpf_insn_array, map)
|
||||
|
||||
#define INSN_DELETED ((u32)-1)
|
||||
|
||||
static inline u64 insn_array_alloc_size(u32 max_entries)
|
||||
{
|
||||
const u64 base_size = sizeof(struct bpf_insn_array);
|
||||
const u64 entry_size = sizeof(struct bpf_insn_array_value);
|
||||
|
||||
return base_size + max_entries * (entry_size + sizeof(long));
|
||||
}
|
||||
|
||||
static int insn_array_alloc_check(union bpf_attr *attr)
|
||||
{
|
||||
u32 value_size = sizeof(struct bpf_insn_array_value);
|
||||
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
attr->value_size != value_size || attr->map_flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void insn_array_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
|
||||
bpf_map_area_free(insn_array);
|
||||
}
|
||||
|
||||
static struct bpf_map *insn_array_alloc(union bpf_attr *attr)
|
||||
{
|
||||
u64 size = insn_array_alloc_size(attr->max_entries);
|
||||
struct bpf_insn_array *insn_array;
|
||||
|
||||
insn_array = bpf_map_area_alloc(size, NUMA_NO_NODE);
|
||||
if (!insn_array)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* ips are allocated right after the insn_array->values[] array */
|
||||
insn_array->ips = (void *)&insn_array->values[attr->max_entries];
|
||||
|
||||
bpf_map_init_from_attr(&insn_array->map, attr);
|
||||
|
||||
/* BPF programs aren't allowed to write to the map */
|
||||
insn_array->map.map_flags |= BPF_F_RDONLY_PROG;
|
||||
|
||||
return &insn_array->map;
|
||||
}
|
||||
|
||||
static void *insn_array_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
u32 index = *(u32 *)key;
|
||||
|
||||
if (unlikely(index >= insn_array->map.max_entries))
|
||||
return NULL;
|
||||
|
||||
return &insn_array->values[index];
|
||||
}
|
||||
|
||||
static long insn_array_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
u32 index = *(u32 *)key;
|
||||
struct bpf_insn_array_value val = {};
|
||||
|
||||
if (unlikely(index >= insn_array->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
if (unlikely(map_flags & BPF_NOEXIST))
|
||||
return -EEXIST;
|
||||
|
||||
copy_map_value(map, &val, value);
|
||||
if (val.jitted_off || val.xlated_off)
|
||||
return -EINVAL;
|
||||
|
||||
insn_array->values[index].orig_off = val.orig_off;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long insn_array_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int insn_array_check_btf(const struct bpf_map *map,
|
||||
const struct btf *btf,
|
||||
const struct btf_type *key_type,
|
||||
const struct btf_type *value_type)
|
||||
{
|
||||
if (!btf_type_is_i32(key_type))
|
||||
return -EINVAL;
|
||||
|
||||
if (!btf_type_is_i64(value_type))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 insn_array_mem_usage(const struct bpf_map *map)
|
||||
{
|
||||
return insn_array_alloc_size(map->max_entries);
|
||||
}
|
||||
|
||||
static int insn_array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
|
||||
if ((off % sizeof(long)) != 0 ||
|
||||
(off / sizeof(long)) >= map->max_entries)
|
||||
return -EINVAL;
|
||||
|
||||
/* from BPF's point of view, this map is a jump table */
|
||||
*imm = (unsigned long)insn_array->ips + off;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(insn_array_btf_ids, struct, bpf_insn_array)
|
||||
|
||||
const struct bpf_map_ops insn_array_map_ops = {
|
||||
.map_alloc_check = insn_array_alloc_check,
|
||||
.map_alloc = insn_array_alloc,
|
||||
.map_free = insn_array_free,
|
||||
.map_get_next_key = bpf_array_get_next_key,
|
||||
.map_lookup_elem = insn_array_lookup_elem,
|
||||
.map_update_elem = insn_array_update_elem,
|
||||
.map_delete_elem = insn_array_delete_elem,
|
||||
.map_check_btf = insn_array_check_btf,
|
||||
.map_mem_usage = insn_array_mem_usage,
|
||||
.map_direct_value_addr = insn_array_map_direct_value_addr,
|
||||
.map_btf_id = &insn_array_btf_ids[0],
|
||||
};
|
||||
|
||||
static inline bool is_frozen(struct bpf_map *map)
|
||||
{
|
||||
guard(mutex)(&map->freeze_mutex);
|
||||
|
||||
return map->frozen;
|
||||
}
|
||||
|
||||
static bool is_insn_array(const struct bpf_map *map)
|
||||
{
|
||||
return map->map_type == BPF_MAP_TYPE_INSN_ARRAY;
|
||||
}
|
||||
|
||||
static inline bool valid_offsets(const struct bpf_insn_array *insn_array,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
u32 off;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < insn_array->map.max_entries; i++) {
|
||||
off = insn_array->values[i].orig_off;
|
||||
|
||||
if (off >= prog->len)
|
||||
return false;
|
||||
|
||||
if (off > 0) {
|
||||
if (prog->insnsi[off-1].code == (BPF_LD | BPF_DW | BPF_IMM))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
struct bpf_insn_array_value *values = insn_array->values;
|
||||
int i;
|
||||
|
||||
if (!is_frozen(map))
|
||||
return -EINVAL;
|
||||
|
||||
if (!valid_offsets(insn_array, prog))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* There can be only one program using the map
|
||||
*/
|
||||
if (atomic_xchg(&insn_array->used, 1))
|
||||
return -EBUSY;
|
||||
|
||||
/*
|
||||
* Reset all the map indexes to the original values. This is needed,
|
||||
* e.g., when a replay of verification with different log level should
|
||||
* be performed.
|
||||
*/
|
||||
for (i = 0; i < map->max_entries; i++)
|
||||
values[i].xlated_off = values[i].orig_off;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bpf_insn_array_ready(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
if (insn_array->values[i].xlated_off == INSN_DELETED)
|
||||
continue;
|
||||
if (!insn_array->ips[i])
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_insn_array_release(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
|
||||
atomic_set(&insn_array->used, 0);
|
||||
}
|
||||
|
||||
void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
int i;
|
||||
|
||||
if (len <= 1)
|
||||
return;
|
||||
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
if (insn_array->values[i].xlated_off <= off)
|
||||
continue;
|
||||
if (insn_array->values[i].xlated_off == INSN_DELETED)
|
||||
continue;
|
||||
insn_array->values[i].xlated_off += len - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len)
|
||||
{
|
||||
struct bpf_insn_array *insn_array = cast_insn_array(map);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
if (insn_array->values[i].xlated_off < off)
|
||||
continue;
|
||||
if (insn_array->values[i].xlated_off == INSN_DELETED)
|
||||
continue;
|
||||
if (insn_array->values[i].xlated_off < off + len)
|
||||
insn_array->values[i].xlated_off = INSN_DELETED;
|
||||
else
|
||||
insn_array->values[i].xlated_off -= len;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called by JITs. The image is the real program
|
||||
* image, the offsets array set up the xlated -> jitted mapping.
|
||||
* The offsets[xlated] offset should point to the beginning of
|
||||
* the jitted instruction.
|
||||
*/
|
||||
void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
|
||||
{
|
||||
struct bpf_insn_array *insn_array;
|
||||
struct bpf_map *map;
|
||||
u32 xlated_off;
|
||||
int i, j;
|
||||
|
||||
if (!offsets || !image)
|
||||
return;
|
||||
|
||||
for (i = 0; i < prog->aux->used_map_cnt; i++) {
|
||||
map = prog->aux->used_maps[i];
|
||||
if (!is_insn_array(map))
|
||||
continue;
|
||||
|
||||
insn_array = cast_insn_array(map);
|
||||
for (j = 0; j < map->max_entries; j++) {
|
||||
xlated_off = insn_array->values[j].xlated_off;
|
||||
if (xlated_off == INSN_DELETED)
|
||||
continue;
|
||||
if (xlated_off < prog->aux->subprog_start)
|
||||
continue;
|
||||
xlated_off -= prog->aux->subprog_start;
|
||||
if (xlated_off >= prog->len)
|
||||
continue;
|
||||
|
||||
insn_array->values[j].jitted_off = offsets[xlated_off];
|
||||
insn_array->ips[j] = (long)(image + offsets[xlated_off]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -73,30 +73,24 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
|
||||
|
||||
struct bpf_local_storage_elem *
|
||||
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
void *value, bool charge_mem, bool swap_uptrs, gfp_t gfp_flags)
|
||||
void *value, bool swap_uptrs, gfp_t gfp_flags)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
|
||||
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
|
||||
if (mem_charge(smap, owner, smap->elem_size))
|
||||
return NULL;
|
||||
|
||||
if (smap->bpf_ma) {
|
||||
selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
|
||||
if (selem)
|
||||
/* Keep the original bpf_map_kzalloc behavior
|
||||
* before started using the bpf_mem_cache_alloc.
|
||||
*
|
||||
* No need to use zero_map_value. The bpf_selem_free()
|
||||
* only does bpf_mem_cache_free when there is
|
||||
* no other bpf prog is using the selem.
|
||||
*/
|
||||
memset(SDATA(selem)->data, 0, smap->map.value_size);
|
||||
if (smap->use_kmalloc_nolock) {
|
||||
selem = bpf_map_kmalloc_nolock(&smap->map, smap->elem_size,
|
||||
__GFP_ZERO, NUMA_NO_NODE);
|
||||
} else {
|
||||
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
|
||||
gfp_flags | __GFP_NOWARN);
|
||||
}
|
||||
|
||||
if (selem) {
|
||||
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
|
||||
|
||||
if (value) {
|
||||
/* No need to call check_and_init_map_value as memory is zero init */
|
||||
copy_map_value(&smap->map, SDATA(selem)->data, value);
|
||||
@@ -106,13 +100,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
|
||||
return selem;
|
||||
}
|
||||
|
||||
if (charge_mem)
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* rcu tasks trace callback for bpf_ma == false */
|
||||
/* rcu tasks trace callback for use_kmalloc_nolock == false */
|
||||
static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
@@ -127,23 +120,7 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
||||
kfree_rcu(local_storage, rcu);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
bpf_mem_cache_raw_free(local_storage);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
bpf_local_storage_free_rcu(rcu);
|
||||
else
|
||||
call_rcu(rcu, bpf_local_storage_free_rcu);
|
||||
}
|
||||
|
||||
/* Handle bpf_ma == false */
|
||||
/* Handle use_kmalloc_nolock == false */
|
||||
static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
|
||||
bool vanilla_rcu)
|
||||
{
|
||||
@@ -154,35 +131,43 @@ static void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
|
||||
__bpf_local_storage_free_trace_rcu);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage *local_storage;
|
||||
|
||||
local_storage = container_of(rcu, struct bpf_local_storage, rcu);
|
||||
kfree_nolock(local_storage);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
if (rcu_trace_implies_rcu_gp())
|
||||
bpf_local_storage_free_rcu(rcu);
|
||||
else
|
||||
call_rcu(rcu, bpf_local_storage_free_rcu);
|
||||
}
|
||||
|
||||
static void bpf_local_storage_free(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_map *smap,
|
||||
bool bpf_ma, bool reuse_now)
|
||||
bool reuse_now)
|
||||
{
|
||||
if (!local_storage)
|
||||
return;
|
||||
|
||||
if (!bpf_ma) {
|
||||
if (!local_storage->use_kmalloc_nolock) {
|
||||
__bpf_local_storage_free(local_storage, reuse_now);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!reuse_now) {
|
||||
call_rcu_tasks_trace(&local_storage->rcu,
|
||||
bpf_local_storage_free_trace_rcu);
|
||||
if (reuse_now) {
|
||||
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (smap)
|
||||
bpf_mem_cache_free(&smap->storage_ma, local_storage);
|
||||
else
|
||||
/* smap could be NULL if the selem that triggered
|
||||
* this 'local_storage' creation had been long gone.
|
||||
* In this case, directly do call_rcu().
|
||||
*/
|
||||
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
|
||||
call_rcu_tasks_trace(&local_storage->rcu,
|
||||
bpf_local_storage_free_trace_rcu);
|
||||
}
|
||||
|
||||
/* rcu tasks trace callback for bpf_ma == false */
|
||||
/* rcu tasks trace callback for use_kmalloc_nolock == false */
|
||||
static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
@@ -194,7 +179,7 @@ static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
kfree_rcu(selem, rcu);
|
||||
}
|
||||
|
||||
/* Handle bpf_ma == false */
|
||||
/* Handle use_kmalloc_nolock == false */
|
||||
static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
bool vanilla_rcu)
|
||||
{
|
||||
@@ -216,7 +201,7 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
|
||||
migrate_disable();
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
migrate_enable();
|
||||
bpf_mem_cache_raw_free(selem);
|
||||
kfree_nolock(selem);
|
||||
}
|
||||
|
||||
static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
@@ -228,14 +213,17 @@ static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
|
||||
}
|
||||
|
||||
void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
struct bpf_local_storage_map *smap,
|
||||
bool reuse_now)
|
||||
{
|
||||
if (!smap->bpf_ma) {
|
||||
/* Only task storage has uptrs and task storage
|
||||
* has moved to bpf_mem_alloc. Meaning smap->bpf_ma == true
|
||||
* for task storage, so this bpf_obj_free_fields() won't unpin
|
||||
* any uptr.
|
||||
struct bpf_local_storage_map *smap;
|
||||
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
|
||||
if (!smap->use_kmalloc_nolock) {
|
||||
/*
|
||||
* No uptr will be unpin even when reuse_now == false since uptr
|
||||
* is only supported in task local storage, where
|
||||
* smap->use_kmalloc_nolock == true.
|
||||
*/
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
__bpf_selem_free(selem, reuse_now);
|
||||
@@ -243,18 +231,11 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
}
|
||||
|
||||
if (reuse_now) {
|
||||
/* reuse_now == true only happens when the storage owner
|
||||
* (e.g. task_struct) is being destructed or the map itself
|
||||
* is being destructed (ie map_free). In both cases,
|
||||
* no bpf prog can have a hold on the selem. It is
|
||||
* safe to unpin the uptrs and free the selem now.
|
||||
/*
|
||||
* While it is okay to call bpf_obj_free_fields() that unpins uptr when
|
||||
* reuse_now == true, keep it in bpf_selem_free_rcu() for simplicity.
|
||||
*/
|
||||
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
|
||||
/* Instead of using the vanilla call_rcu(),
|
||||
* bpf_mem_cache_free will be able to reuse selem
|
||||
* immediately.
|
||||
*/
|
||||
bpf_mem_cache_free(&smap->selem_ma, selem);
|
||||
call_rcu(&selem->rcu, bpf_selem_free_rcu);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -264,7 +245,6 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
|
||||
static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
|
||||
{
|
||||
struct bpf_local_storage_elem *selem;
|
||||
struct bpf_local_storage_map *smap;
|
||||
struct hlist_node *n;
|
||||
|
||||
/* The "_safe" iteration is needed.
|
||||
@@ -272,10 +252,8 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
|
||||
* but bpf_selem_free will use the selem->rcu_head
|
||||
* which is union-ized with the selem->free_node.
|
||||
*/
|
||||
hlist_for_each_entry_safe(selem, n, list, free_node) {
|
||||
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
bpf_selem_free(selem, smap, reuse_now);
|
||||
}
|
||||
hlist_for_each_entry_safe(selem, n, list, free_node)
|
||||
bpf_selem_free(selem, reuse_now);
|
||||
}
|
||||
|
||||
/* local_storage->lock must be held and selem->local_storage == local_storage.
|
||||
@@ -284,7 +262,7 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
|
||||
*/
|
||||
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_elem *selem,
|
||||
bool uncharge_mem, struct hlist_head *free_selem_list)
|
||||
struct hlist_head *free_selem_list)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
bool free_local_storage;
|
||||
@@ -297,8 +275,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
|
||||
* The owner may be freed once the last selem is unlinked
|
||||
* from local_storage.
|
||||
*/
|
||||
if (uncharge_mem)
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
|
||||
free_local_storage = hlist_is_singular_node(&selem->snode,
|
||||
&local_storage->list);
|
||||
@@ -336,47 +313,11 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
|
||||
return free_local_storage;
|
||||
}
|
||||
|
||||
static bool check_storage_bpf_ma(struct bpf_local_storage *local_storage,
|
||||
struct bpf_local_storage_map *storage_smap,
|
||||
struct bpf_local_storage_elem *selem)
|
||||
{
|
||||
|
||||
struct bpf_local_storage_map *selem_smap;
|
||||
|
||||
/* local_storage->smap may be NULL. If it is, get the bpf_ma
|
||||
* from any selem in the local_storage->list. The bpf_ma of all
|
||||
* local_storage and selem should have the same value
|
||||
* for the same map type.
|
||||
*
|
||||
* If the local_storage->list is already empty, the caller will not
|
||||
* care about the bpf_ma value also because the caller is not
|
||||
* responsible to free the local_storage.
|
||||
*/
|
||||
|
||||
if (storage_smap)
|
||||
return storage_smap->bpf_ma;
|
||||
|
||||
if (!selem) {
|
||||
struct hlist_node *n;
|
||||
|
||||
n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
|
||||
bpf_rcu_lock_held());
|
||||
if (!n)
|
||||
return false;
|
||||
|
||||
selem = hlist_entry(n, struct bpf_local_storage_elem, snode);
|
||||
}
|
||||
selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
|
||||
|
||||
return selem_smap->bpf_ma;
|
||||
}
|
||||
|
||||
static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
|
||||
bool reuse_now)
|
||||
{
|
||||
struct bpf_local_storage_map *storage_smap;
|
||||
struct bpf_local_storage *local_storage;
|
||||
bool bpf_ma, free_local_storage = false;
|
||||
bool free_local_storage = false;
|
||||
HLIST_HEAD(selem_free_list);
|
||||
unsigned long flags;
|
||||
|
||||
@@ -386,20 +327,17 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
|
||||
|
||||
local_storage = rcu_dereference_check(selem->local_storage,
|
||||
bpf_rcu_lock_held());
|
||||
storage_smap = rcu_dereference_check(local_storage->smap,
|
||||
bpf_rcu_lock_held());
|
||||
bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem);
|
||||
|
||||
raw_spin_lock_irqsave(&local_storage->lock, flags);
|
||||
if (likely(selem_linked_to_storage(selem)))
|
||||
free_local_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, true, &selem_free_list);
|
||||
local_storage, selem, &selem_free_list);
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
bpf_selem_free_list(&selem_free_list, reuse_now);
|
||||
|
||||
if (free_local_storage)
|
||||
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
|
||||
bpf_local_storage_free(local_storage, reuse_now);
|
||||
}
|
||||
|
||||
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
|
||||
@@ -434,7 +372,6 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&b->lock, flags);
|
||||
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
|
||||
hlist_add_head_rcu(&selem->map_node, &b->list);
|
||||
raw_spin_unlock_irqrestore(&b->lock, flags);
|
||||
}
|
||||
@@ -493,8 +430,9 @@ int bpf_local_storage_alloc(void *owner,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (smap->bpf_ma)
|
||||
storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
|
||||
if (smap->use_kmalloc_nolock)
|
||||
storage = bpf_map_kmalloc_nolock(&smap->map, sizeof(*storage),
|
||||
__GFP_ZERO, NUMA_NO_NODE);
|
||||
else
|
||||
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
|
||||
gfp_flags | __GFP_NOWARN);
|
||||
@@ -507,6 +445,7 @@ int bpf_local_storage_alloc(void *owner,
|
||||
INIT_HLIST_HEAD(&storage->list);
|
||||
raw_spin_lock_init(&storage->lock);
|
||||
storage->owner = owner;
|
||||
storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
|
||||
|
||||
bpf_selem_link_storage_nolock(storage, first_selem);
|
||||
bpf_selem_link_map(smap, first_selem);
|
||||
@@ -528,22 +467,12 @@ int bpf_local_storage_alloc(void *owner,
|
||||
bpf_selem_unlink_map(first_selem);
|
||||
err = -EAGAIN;
|
||||
goto uncharge;
|
||||
|
||||
/* Note that even first_selem was linked to smap's
|
||||
* bucket->list, first_selem can be freed immediately
|
||||
* (instead of kfree_rcu) because
|
||||
* bpf_local_storage_map_free() does a
|
||||
* synchronize_rcu_mult (waiting for both sleepable and
|
||||
* normal programs) before walking the bucket->list.
|
||||
* Hence, no one is accessing selem from the
|
||||
* bucket->list under rcu_read_lock().
|
||||
*/
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
uncharge:
|
||||
bpf_local_storage_free(storage, smap, smap->bpf_ma, true);
|
||||
bpf_local_storage_free(storage, true);
|
||||
mem_uncharge(smap, owner, sizeof(*storage));
|
||||
return err;
|
||||
}
|
||||
@@ -582,13 +511,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
|
||||
selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
|
||||
if (!selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
|
||||
if (err) {
|
||||
bpf_selem_free(selem, smap, true);
|
||||
bpf_selem_free(selem, true);
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
@@ -616,7 +545,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
/* A lookup has just been done before and concluded a new selem is
|
||||
* needed. The chance of an unnecessary alloc is unlikely.
|
||||
*/
|
||||
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
|
||||
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, swap_uptrs, gfp_flags);
|
||||
if (!alloc_selem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -656,7 +585,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
|
||||
if (old_sdata) {
|
||||
bpf_selem_unlink_map(SELEM(old_sdata));
|
||||
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
|
||||
true, &old_selem_free_list);
|
||||
&old_selem_free_list);
|
||||
}
|
||||
|
||||
unlock:
|
||||
@@ -664,7 +593,7 @@ unlock:
|
||||
bpf_selem_free_list(&old_selem_free_list, false);
|
||||
if (alloc_selem) {
|
||||
mem_uncharge(smap, owner, smap->elem_size);
|
||||
bpf_selem_free(alloc_selem, smap, true);
|
||||
bpf_selem_free(alloc_selem, true);
|
||||
}
|
||||
return err ? ERR_PTR(err) : SDATA(selem);
|
||||
}
|
||||
@@ -730,16 +659,12 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
|
||||
|
||||
void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
|
||||
{
|
||||
struct bpf_local_storage_map *storage_smap;
|
||||
struct bpf_local_storage_elem *selem;
|
||||
bool bpf_ma, free_storage = false;
|
||||
bool free_storage = false;
|
||||
HLIST_HEAD(free_selem_list);
|
||||
struct hlist_node *n;
|
||||
unsigned long flags;
|
||||
|
||||
storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
|
||||
bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);
|
||||
|
||||
/* Neither the bpf_prog nor the bpf_map's syscall
|
||||
* could be modifying the local_storage->list now.
|
||||
* Thus, no elem can be added to or deleted from the
|
||||
@@ -762,14 +687,14 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
|
||||
* of the loop will set the free_cgroup_storage to true.
|
||||
*/
|
||||
free_storage = bpf_selem_unlink_storage_nolock(
|
||||
local_storage, selem, true, &free_selem_list);
|
||||
local_storage, selem, &free_selem_list);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
|
||||
|
||||
bpf_selem_free_list(&free_selem_list, true);
|
||||
|
||||
if (free_storage)
|
||||
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
|
||||
bpf_local_storage_free(local_storage, true);
|
||||
}
|
||||
|
||||
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
|
||||
@@ -782,20 +707,10 @@ u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
|
||||
return usage;
|
||||
}
|
||||
|
||||
/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory.
|
||||
* A deadlock free allocator is useful for storage that the bpf prog can easily
|
||||
* get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf.
|
||||
* The task and cgroup storage fall into this case. The bpf_mem_alloc reuses
|
||||
* memory immediately. To be reuse-immediate safe, the owner destruction
|
||||
* code path needs to go through a rcu grace period before calling
|
||||
* bpf_local_storage_destroy().
|
||||
*
|
||||
* When bpf_ma == false, the kmalloc and kfree are used.
|
||||
*/
|
||||
struct bpf_map *
|
||||
bpf_local_storage_map_alloc(union bpf_attr *attr,
|
||||
struct bpf_local_storage_cache *cache,
|
||||
bool bpf_ma)
|
||||
bool use_kmalloc_nolock)
|
||||
{
|
||||
struct bpf_local_storage_map *smap;
|
||||
unsigned int i;
|
||||
@@ -829,20 +744,9 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
|
||||
|
||||
/* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non
|
||||
* preemptible context. Thus, enforce all storages to use
|
||||
* bpf_mem_alloc when CONFIG_PREEMPT_RT is enabled.
|
||||
* kmalloc_nolock() when CONFIG_PREEMPT_RT is enabled.
|
||||
*/
|
||||
smap->bpf_ma = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : bpf_ma;
|
||||
if (smap->bpf_ma) {
|
||||
err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
|
||||
if (err)
|
||||
goto free_smap;
|
||||
|
||||
err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false);
|
||||
if (err) {
|
||||
bpf_mem_alloc_destroy(&smap->selem_ma);
|
||||
goto free_smap;
|
||||
}
|
||||
}
|
||||
smap->use_kmalloc_nolock = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : use_kmalloc_nolock;
|
||||
|
||||
smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
|
||||
return &smap->map;
|
||||
@@ -912,12 +816,9 @@ void bpf_local_storage_map_free(struct bpf_map *map,
|
||||
*/
|
||||
synchronize_rcu();
|
||||
|
||||
if (smap->bpf_ma) {
|
||||
if (smap->use_kmalloc_nolock) {
|
||||
rcu_barrier_tasks_trace();
|
||||
if (!rcu_trace_implies_rcu_gp())
|
||||
rcu_barrier();
|
||||
bpf_mem_alloc_destroy(&smap->selem_ma);
|
||||
bpf_mem_alloc_destroy(&smap->storage_ma);
|
||||
rcu_barrier();
|
||||
}
|
||||
kvfree(smap->buckets);
|
||||
bpf_map_area_free(smap);
|
||||
|
||||
@@ -51,6 +51,7 @@ BTF_ID(func, bpf_lsm_key_getsecurity)
|
||||
BTF_ID(func, bpf_lsm_audit_rule_match)
|
||||
#endif
|
||||
BTF_ID(func, bpf_lsm_ismaclabel)
|
||||
BTF_ID(func, bpf_lsm_file_alloc_security)
|
||||
BTF_SET_END(bpf_lsm_disabled_hooks)
|
||||
|
||||
/* List of LSM hooks that should operate on 'current' cgroup regardless
|
||||
|
||||
@@ -1450,6 +1450,23 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other)
|
||||
bpf_prog_clone_free(fp_other);
|
||||
}
|
||||
|
||||
static void adjust_insn_arrays(struct bpf_prog *prog, u32 off, u32 len)
|
||||
{
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct bpf_map *map;
|
||||
int i;
|
||||
|
||||
if (len <= 1)
|
||||
return;
|
||||
|
||||
for (i = 0; i < prog->aux->used_map_cnt; i++) {
|
||||
map = prog->aux->used_maps[i];
|
||||
if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY)
|
||||
bpf_insn_array_adjust(map, off, len);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_insn insn_buff[16], aux[2];
|
||||
@@ -1505,6 +1522,9 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
|
||||
clone = tmp;
|
||||
insn_delta = rewritten - 1;
|
||||
|
||||
/* Instructions arrays must be updated using absolute xlated offsets */
|
||||
adjust_insn_arrays(clone, prog->aux->subprog_start + i, rewritten);
|
||||
|
||||
/* Walk new program and skip insns we just inserted. */
|
||||
insn = clone->insnsi + i + insn_delta;
|
||||
insn_cnt += insn_delta;
|
||||
@@ -1688,6 +1708,7 @@ bool bpf_opcode_in_insntable(u8 code)
|
||||
[BPF_LD | BPF_IND | BPF_B] = true,
|
||||
[BPF_LD | BPF_IND | BPF_H] = true,
|
||||
[BPF_LD | BPF_IND | BPF_W] = true,
|
||||
[BPF_JMP | BPF_JA | BPF_X] = true,
|
||||
[BPF_JMP | BPF_JCOND] = true,
|
||||
};
|
||||
#undef BPF_INSN_3_TBL
|
||||
@@ -3129,8 +3150,9 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
||||
void *addr1, void *addr2)
|
||||
int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
|
||||
enum bpf_text_poke_type new_t, void *old_addr,
|
||||
void *new_addr)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
@@ -358,6 +358,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
|
||||
} else if (insn->code == (BPF_JMP | BPF_JA)) {
|
||||
verbose(cbs->private_data, "(%02x) goto pc%+d\n",
|
||||
insn->code, insn->off);
|
||||
} else if (insn->code == (BPF_JMP | BPF_JA | BPF_X)) {
|
||||
verbose(cbs->private_data, "(%02x) gotox r%d\n",
|
||||
insn->code, insn->dst_reg);
|
||||
} else if (insn->code == (BPF_JMP | BPF_JCOND) &&
|
||||
insn->src_reg == BPF_MAY_GOTO) {
|
||||
verbose(cbs->private_data, "(%02x) may_goto pc%+d\n",
|
||||
|
||||
@@ -215,19 +215,6 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
|
||||
return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
|
||||
}
|
||||
|
||||
static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *elem)
|
||||
{
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER))
|
||||
bpf_obj_free_timer(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
|
||||
bpf_obj_free_task_work(htab->map.record,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
}
|
||||
|
||||
static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
|
||||
{
|
||||
u32 num_entries = htab->map.max_entries;
|
||||
@@ -240,7 +227,8 @@ static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
|
||||
struct htab_elem *elem;
|
||||
|
||||
elem = get_htab_elem(htab, i);
|
||||
htab_free_internal_structs(htab, elem);
|
||||
bpf_map_free_internal_structs(&htab->map,
|
||||
htab_elem_value(elem, htab->map.key_size));
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
@@ -669,8 +657,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
struct htab_elem *l;
|
||||
u32 hash, key_size;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -947,15 +934,21 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
|
||||
void *value, bool onallcpus)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
if (!onallcpus) {
|
||||
/* copy true value_size bytes */
|
||||
copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
|
||||
ptr = this_cpu_ptr(pptr);
|
||||
copy_map_value(&htab->map, ptr, value);
|
||||
bpf_obj_free_fields(htab->map.record, ptr);
|
||||
} else {
|
||||
u32 size = round_up(htab->map.value_size, 8);
|
||||
int off = 0, cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
|
||||
ptr = per_cpu_ptr(pptr, cpu);
|
||||
copy_map_value_long(&htab->map, ptr, value + off);
|
||||
bpf_obj_free_fields(htab->map.record, ptr);
|
||||
off += size;
|
||||
}
|
||||
}
|
||||
@@ -1098,8 +1091,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1206,8 +1198,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1275,8 +1266,7 @@ static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1338,8 +1328,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
/* unknown flags */
|
||||
return -EINVAL;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1416,8 +1405,7 @@ static long htab_map_delete_elem(struct bpf_map *map, void *key)
|
||||
u32 hash, key_size;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1452,8 +1440,7 @@ static long htab_lru_map_delete_elem(struct bpf_map *map, void *key)
|
||||
u32 hash, key_size;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
|
||||
@@ -1509,8 +1496,9 @@ static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_nulls_for_each_entry(l, n, head, hash_node) {
|
||||
/* We only free timer on uref dropping to zero */
|
||||
htab_free_internal_structs(htab, l);
|
||||
/* We only free internal structs on uref dropping to zero */
|
||||
bpf_map_free_internal_structs(&htab->map,
|
||||
htab_elem_value(l, htab->map.key_size));
|
||||
}
|
||||
cond_resched_rcu();
|
||||
}
|
||||
@@ -1521,13 +1509,14 @@ static void htab_map_free_internal_structs(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
/* We only free timer and workqueue on uref dropping to zero */
|
||||
if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
|
||||
if (!htab_is_prealloc(htab))
|
||||
htab_free_malloced_internal_structs(htab);
|
||||
else
|
||||
htab_free_prealloced_internal_structs(htab);
|
||||
}
|
||||
/* We only free internal structs on uref dropping to zero */
|
||||
if (!bpf_map_has_internal_structs(map))
|
||||
return;
|
||||
|
||||
if (htab_is_prealloc(htab))
|
||||
htab_free_prealloced_internal_structs(htab);
|
||||
else
|
||||
htab_free_malloced_internal_structs(htab);
|
||||
}
|
||||
|
||||
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <linux/verification.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/buildid.h>
|
||||
|
||||
#include "../../lib/kstrtox.h"
|
||||
|
||||
@@ -42,8 +43,7 @@
|
||||
*/
|
||||
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
return (unsigned long) map->ops->map_lookup_elem(map, key);
|
||||
}
|
||||
|
||||
@@ -59,8 +59,7 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
|
||||
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
|
||||
void *, value, u64, flags)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
return map->ops->map_update_elem(map, key, value, flags);
|
||||
}
|
||||
|
||||
@@ -77,8 +76,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
|
||||
|
||||
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
return map->ops->map_delete_elem(map, key);
|
||||
}
|
||||
|
||||
@@ -134,8 +132,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
|
||||
|
||||
BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
|
||||
!rcu_read_lock_bh_held());
|
||||
WARN_ON_ONCE(!bpf_rcu_lock_held());
|
||||
return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
|
||||
}
|
||||
|
||||
@@ -777,9 +774,11 @@ int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
|
||||
{
|
||||
int nest_level;
|
||||
|
||||
preempt_disable();
|
||||
nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
|
||||
if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
|
||||
this_cpu_dec(bpf_bprintf_nest_level);
|
||||
preempt_enable();
|
||||
return -EBUSY;
|
||||
}
|
||||
*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
|
||||
@@ -792,6 +791,7 @@ void bpf_put_buffers(void)
|
||||
if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
|
||||
return;
|
||||
this_cpu_dec(bpf_bprintf_nest_level);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
|
||||
@@ -1660,6 +1660,13 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
|
||||
.arg2_btf_id = BPF_PTR_POISON,
|
||||
};
|
||||
|
||||
struct bpf_dynptr_file_impl {
|
||||
struct freader freader;
|
||||
/* 64 bit offset and size overriding 32 bit ones in bpf_dynptr_kern */
|
||||
u64 offset;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
/* Since the upper 8 bits of dynptr->size is reserved, the
|
||||
* maximum supported size is 2^24 - 1.
|
||||
*/
|
||||
@@ -1688,23 +1695,65 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt
|
||||
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
|
||||
}
|
||||
|
||||
u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
|
||||
u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
|
||||
struct bpf_dynptr_file_impl *df = ptr->data;
|
||||
|
||||
return df->size;
|
||||
}
|
||||
|
||||
return ptr->size & DYNPTR_SIZE_MASK;
|
||||
}
|
||||
|
||||
static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u32 new_size)
|
||||
static void bpf_dynptr_advance_offset(struct bpf_dynptr_kern *ptr, u64 off)
|
||||
{
|
||||
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
|
||||
struct bpf_dynptr_file_impl *df = ptr->data;
|
||||
|
||||
df->offset += off;
|
||||
return;
|
||||
}
|
||||
ptr->offset += off;
|
||||
}
|
||||
|
||||
static void bpf_dynptr_set_size(struct bpf_dynptr_kern *ptr, u64 new_size)
|
||||
{
|
||||
u32 metadata = ptr->size & ~DYNPTR_SIZE_MASK;
|
||||
|
||||
ptr->size = new_size | metadata;
|
||||
if (bpf_dynptr_get_type(ptr) == BPF_DYNPTR_TYPE_FILE) {
|
||||
struct bpf_dynptr_file_impl *df = ptr->data;
|
||||
|
||||
df->size = new_size;
|
||||
return;
|
||||
}
|
||||
ptr->size = (u32)new_size | metadata;
|
||||
}
|
||||
|
||||
int bpf_dynptr_check_size(u32 size)
|
||||
int bpf_dynptr_check_size(u64 size)
|
||||
{
|
||||
return size > DYNPTR_MAX_SIZE ? -E2BIG : 0;
|
||||
}
|
||||
|
||||
static int bpf_file_fetch_bytes(struct bpf_dynptr_file_impl *df, u64 offset, void *buf, u64 len)
|
||||
{
|
||||
const void *ptr;
|
||||
|
||||
if (!buf)
|
||||
return -EINVAL;
|
||||
|
||||
df->freader.buf = buf;
|
||||
df->freader.buf_sz = len;
|
||||
ptr = freader_fetch(&df->freader, offset + df->offset, len);
|
||||
if (!ptr)
|
||||
return df->freader.err;
|
||||
|
||||
if (ptr != buf) /* Force copying into the buffer */
|
||||
memcpy(buf, ptr, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
|
||||
enum bpf_dynptr_type type, u32 offset, u32 size)
|
||||
{
|
||||
@@ -1719,7 +1768,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
|
||||
memset(ptr, 0, sizeof(*ptr));
|
||||
}
|
||||
|
||||
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
|
||||
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u64, size, u64, flags, struct bpf_dynptr_kern *, ptr)
|
||||
{
|
||||
int err;
|
||||
|
||||
@@ -1754,8 +1803,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
|
||||
};
|
||||
|
||||
static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src,
|
||||
u32 offset, u64 flags)
|
||||
static int __bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr_kern *src,
|
||||
u64 offset, u64 flags)
|
||||
{
|
||||
enum bpf_dynptr_type type;
|
||||
int err;
|
||||
@@ -1785,14 +1834,16 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
|
||||
return 0;
|
||||
case BPF_DYNPTR_TYPE_FILE:
|
||||
return bpf_file_fetch_bytes(src->data, offset, dst, len);
|
||||
default:
|
||||
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
|
||||
u32, offset, u64, flags)
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u64, len, const struct bpf_dynptr_kern *, src,
|
||||
u64, offset, u64, flags)
|
||||
{
|
||||
return __bpf_dynptr_read(dst, len, src, offset, flags);
|
||||
}
|
||||
@@ -1808,8 +1859,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
|
||||
u32 len, u64 flags)
|
||||
int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, void *src,
|
||||
u64 len, u64 flags)
|
||||
{
|
||||
enum bpf_dynptr_type type;
|
||||
int err;
|
||||
@@ -1852,8 +1903,8 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
|
||||
}
|
||||
}
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
|
||||
u32, len, u64, flags)
|
||||
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u64, offset, void *, src,
|
||||
u64, len, u64, flags)
|
||||
{
|
||||
return __bpf_dynptr_write(dst, offset, src, len, flags);
|
||||
}
|
||||
@@ -1869,7 +1920,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
|
||||
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u64, offset, u64, len)
|
||||
{
|
||||
enum bpf_dynptr_type type;
|
||||
int err;
|
||||
@@ -2684,12 +2735,12 @@ __bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
|
||||
* provided buffer, with its contents containing the data, if unable to obtain
|
||||
* direct pointer)
|
||||
*/
|
||||
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
|
||||
void *buffer__opt, u32 buffer__szk)
|
||||
__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u64 offset,
|
||||
void *buffer__opt, u64 buffer__szk)
|
||||
{
|
||||
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
|
||||
enum bpf_dynptr_type type;
|
||||
u32 len = buffer__szk;
|
||||
u64 len = buffer__szk;
|
||||
int err;
|
||||
|
||||
if (!ptr->data)
|
||||
@@ -2723,6 +2774,9 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
|
||||
}
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
|
||||
case BPF_DYNPTR_TYPE_FILE:
|
||||
err = bpf_file_fetch_bytes(ptr->data, offset, buffer__opt, buffer__szk);
|
||||
return err ? NULL : buffer__opt;
|
||||
default:
|
||||
WARN_ONCE(true, "unknown dynptr type %d\n", type);
|
||||
return NULL;
|
||||
@@ -2771,8 +2825,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
|
||||
* provided buffer, with its contents containing the data, if unable to obtain
|
||||
* direct pointer)
|
||||
*/
|
||||
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
|
||||
void *buffer__opt, u32 buffer__szk)
|
||||
__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
|
||||
void *buffer__opt, u64 buffer__szk)
|
||||
{
|
||||
const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
|
||||
|
||||
@@ -2804,10 +2858,10 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
|
||||
return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end)
|
||||
__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u64 start, u64 end)
|
||||
{
|
||||
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
|
||||
u32 size;
|
||||
u64 size;
|
||||
|
||||
if (!ptr->data || start > end)
|
||||
return -EINVAL;
|
||||
@@ -2817,7 +2871,7 @@ __bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end
|
||||
if (start > size || end > size)
|
||||
return -ERANGE;
|
||||
|
||||
ptr->offset += start;
|
||||
bpf_dynptr_advance_offset(ptr, start);
|
||||
bpf_dynptr_set_size(ptr, end - start);
|
||||
|
||||
return 0;
|
||||
@@ -2840,7 +2894,7 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p)
|
||||
return __bpf_dynptr_is_rdonly(ptr);
|
||||
}
|
||||
|
||||
__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p)
|
||||
__bpf_kfunc u64 bpf_dynptr_size(const struct bpf_dynptr *p)
|
||||
{
|
||||
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
|
||||
|
||||
@@ -2877,14 +2931,14 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
|
||||
* Copies data from source dynptr to destination dynptr.
|
||||
* Returns 0 on success; negative error, otherwise.
|
||||
*/
|
||||
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
|
||||
struct bpf_dynptr *src_ptr, u32 src_off, u32 size)
|
||||
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u64 dst_off,
|
||||
struct bpf_dynptr *src_ptr, u64 src_off, u64 size)
|
||||
{
|
||||
struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr;
|
||||
struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr;
|
||||
void *src_slice, *dst_slice;
|
||||
char buf[256];
|
||||
u32 off;
|
||||
u64 off;
|
||||
|
||||
src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size);
|
||||
dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size);
|
||||
@@ -2906,7 +2960,7 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
|
||||
|
||||
off = 0;
|
||||
while (off < size) {
|
||||
u32 chunk_sz = min_t(u32, sizeof(buf), size - off);
|
||||
u64 chunk_sz = min_t(u64, sizeof(buf), size - off);
|
||||
int err;
|
||||
|
||||
err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0);
|
||||
@@ -2932,10 +2986,10 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
|
||||
* at @offset with the constant byte @val.
|
||||
* Returns 0 on success; negative error, otherwise.
|
||||
*/
|
||||
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val)
|
||||
{
|
||||
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u64 offset, u64 size, u8 val)
|
||||
{
|
||||
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
|
||||
u32 chunk_sz, write_off;
|
||||
u64 chunk_sz, write_off;
|
||||
char buf[256];
|
||||
void* slice;
|
||||
int err;
|
||||
@@ -2954,11 +3008,11 @@ __bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
|
||||
return err;
|
||||
|
||||
/* Non-linear data under the dynptr, write from a local buffer */
|
||||
chunk_sz = min_t(u32, sizeof(buf), size);
|
||||
chunk_sz = min_t(u64, sizeof(buf), size);
|
||||
memset(buf, val, chunk_sz);
|
||||
|
||||
for (write_off = 0; write_off < size; write_off += chunk_sz) {
|
||||
chunk_sz = min_t(u32, sizeof(buf), size - write_off);
|
||||
chunk_sz = min_t(u64, sizeof(buf), size - write_off);
|
||||
err = __bpf_dynptr_write(ptr, offset + write_off, buf, chunk_sz, 0);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3678,34 +3732,21 @@ err_out:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_strnstr - Find the first substring in a length-limited string
|
||||
* @s1__ign: The string to be searched
|
||||
* @s2__ign: The string to search for
|
||||
* @len: the maximum number of characters to search
|
||||
*
|
||||
* Return:
|
||||
* * >=0 - Index of the first character of the first occurrence of @s2__ign
|
||||
* within the first @len characters of @s1__ign
|
||||
* * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
|
||||
* * %-EFAULT - Cannot read one of the strings
|
||||
* * %-E2BIG - One of the strings is too large
|
||||
* * %-ERANGE - One of the strings is outside of kernel address space
|
||||
*/
|
||||
__bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len)
|
||||
static int __bpf_strnstr(const char *s1, const char *s2, size_t len,
|
||||
bool ignore_case)
|
||||
{
|
||||
char c1, c2;
|
||||
int i, j;
|
||||
|
||||
if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
|
||||
!copy_from_kernel_nofault_allowed(s2__ign, 1)) {
|
||||
if (!copy_from_kernel_nofault_allowed(s1, 1) ||
|
||||
!copy_from_kernel_nofault_allowed(s2, 1)) {
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
guard(pagefault)();
|
||||
for (i = 0; i < XATTR_SIZE_MAX; i++) {
|
||||
for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) {
|
||||
__get_kernel_nofault(&c2, s2__ign + j, char, err_out);
|
||||
__get_kernel_nofault(&c2, s2 + j, char, err_out);
|
||||
if (c2 == '\0')
|
||||
return i;
|
||||
/*
|
||||
@@ -3715,7 +3756,13 @@ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len
|
||||
*/
|
||||
if (i + j == len)
|
||||
break;
|
||||
__get_kernel_nofault(&c1, s1__ign + j, char, err_out);
|
||||
__get_kernel_nofault(&c1, s1 + j, char, err_out);
|
||||
|
||||
if (ignore_case) {
|
||||
c1 = tolower(c1);
|
||||
c2 = tolower(c2);
|
||||
}
|
||||
|
||||
if (c1 == '\0')
|
||||
return -ENOENT;
|
||||
if (c1 != c2)
|
||||
@@ -3725,7 +3772,7 @@ __bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign, size_t len
|
||||
return -E2BIG;
|
||||
if (i + j == len)
|
||||
return -ENOENT;
|
||||
s1__ign++;
|
||||
s1++;
|
||||
}
|
||||
return -E2BIG;
|
||||
err_out:
|
||||
@@ -3747,8 +3794,69 @@ err_out:
|
||||
*/
|
||||
__bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
|
||||
{
|
||||
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
|
||||
return __bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_strcasestr - Find the first substring in a string, ignoring the case of
|
||||
* the characters
|
||||
* @s1__ign: The string to be searched
|
||||
* @s2__ign: The string to search for
|
||||
*
|
||||
* Return:
|
||||
* * >=0 - Index of the first character of the first occurrence of @s2__ign
|
||||
* within @s1__ign
|
||||
* * %-ENOENT - @s2__ign is not a substring of @s1__ign
|
||||
* * %-EFAULT - Cannot read one of the strings
|
||||
* * %-E2BIG - One of the strings is too large
|
||||
* * %-ERANGE - One of the strings is outside of kernel address space
|
||||
*/
|
||||
__bpf_kfunc int bpf_strcasestr(const char *s1__ign, const char *s2__ign)
|
||||
{
|
||||
return __bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_strnstr - Find the first substring in a length-limited string
|
||||
* @s1__ign: The string to be searched
|
||||
* @s2__ign: The string to search for
|
||||
* @len: the maximum number of characters to search
|
||||
*
|
||||
* Return:
|
||||
* * >=0 - Index of the first character of the first occurrence of @s2__ign
|
||||
* within the first @len characters of @s1__ign
|
||||
* * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
|
||||
* * %-EFAULT - Cannot read one of the strings
|
||||
* * %-E2BIG - One of the strings is too large
|
||||
* * %-ERANGE - One of the strings is outside of kernel address space
|
||||
*/
|
||||
__bpf_kfunc int bpf_strnstr(const char *s1__ign, const char *s2__ign,
|
||||
size_t len)
|
||||
{
|
||||
return __bpf_strnstr(s1__ign, s2__ign, len, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_strncasestr - Find the first substring in a length-limited string,
|
||||
* ignoring the case of the characters
|
||||
* @s1__ign: The string to be searched
|
||||
* @s2__ign: The string to search for
|
||||
* @len: the maximum number of characters to search
|
||||
*
|
||||
* Return:
|
||||
* * >=0 - Index of the first character of the first occurrence of @s2__ign
|
||||
* within the first @len characters of @s1__ign
|
||||
* * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign
|
||||
* * %-EFAULT - Cannot read one of the strings
|
||||
* * %-E2BIG - One of the strings is too large
|
||||
* * %-ERANGE - One of the strings is outside of kernel address space
|
||||
*/
|
||||
__bpf_kfunc int bpf_strncasestr(const char *s1__ign, const char *s2__ign,
|
||||
size_t len)
|
||||
{
|
||||
return __bpf_strnstr(s1__ign, s2__ign, len, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEYS
|
||||
/**
|
||||
* bpf_lookup_user_key - lookup a key by its serial
|
||||
@@ -4206,6 +4314,54 @@ __bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task,
|
||||
return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
|
||||
}
|
||||
|
||||
static int make_file_dynptr(struct file *file, u32 flags, bool may_sleep,
|
||||
struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
struct bpf_dynptr_file_impl *state;
|
||||
|
||||
/* flags is currently unsupported */
|
||||
if (flags) {
|
||||
bpf_dynptr_set_null(ptr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
state = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_dynptr_file_impl));
|
||||
if (!state) {
|
||||
bpf_dynptr_set_null(ptr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
state->offset = 0;
|
||||
state->size = U64_MAX; /* Don't restrict size, as file may change anyways */
|
||||
freader_init_from_file(&state->freader, NULL, 0, file, may_sleep);
|
||||
bpf_dynptr_init(ptr, state, BPF_DYNPTR_TYPE_FILE, 0, 0);
|
||||
bpf_dynptr_set_rdonly(ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
|
||||
{
|
||||
return make_file_dynptr(file, flags, false, (struct bpf_dynptr_kern *)ptr__uninit);
|
||||
}
|
||||
|
||||
int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit)
|
||||
{
|
||||
return make_file_dynptr(file, flags, true, (struct bpf_dynptr_kern *)ptr__uninit);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr)
|
||||
{
|
||||
struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr;
|
||||
struct bpf_dynptr_file_impl *df = ptr->data;
|
||||
|
||||
if (!df)
|
||||
return 0;
|
||||
|
||||
freader_cleanup(&df->freader);
|
||||
bpf_mem_free(&bpf_global_ma, df);
|
||||
bpf_dynptr_set_null(ptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
|
||||
@@ -4376,13 +4532,17 @@ BTF_ID_FLAGS(func, bpf_strnlen);
|
||||
BTF_ID_FLAGS(func, bpf_strspn);
|
||||
BTF_ID_FLAGS(func, bpf_strcspn);
|
||||
BTF_ID_FLAGS(func, bpf_strstr);
|
||||
BTF_ID_FLAGS(func, bpf_strcasestr);
|
||||
BTF_ID_FLAGS(func, bpf_strnstr);
|
||||
BTF_ID_FLAGS(func, bpf_strncasestr);
|
||||
#if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS)
|
||||
BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
|
||||
#endif
|
||||
BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_from_file, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_file_discard)
|
||||
BTF_KFUNCS_END(common_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
@@ -4423,7 +4583,7 @@ late_initcall(kfunc_init);
|
||||
/* Get a pointer to dynptr data up to len bytes for read only access. If
|
||||
* the dynptr doesn't have continuous data up to len bytes, return NULL.
|
||||
*/
|
||||
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
|
||||
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len)
|
||||
{
|
||||
const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr;
|
||||
|
||||
@@ -4434,9 +4594,19 @@ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
|
||||
* the dynptr doesn't have continuous data up to len bytes, or the dynptr
|
||||
* is read only, return NULL.
|
||||
*/
|
||||
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len)
|
||||
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len)
|
||||
{
|
||||
if (__bpf_dynptr_is_rdonly(ptr))
|
||||
return NULL;
|
||||
return (void *)__bpf_dynptr_data(ptr, len);
|
||||
}
|
||||
|
||||
void bpf_map_free_internal_structs(struct bpf_map *map, void *val)
|
||||
{
|
||||
if (btf_record_has_field(map->record, BPF_TIMER))
|
||||
bpf_obj_free_timer(map->record, val);
|
||||
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
|
||||
bpf_obj_free_workqueue(map->record, val);
|
||||
if (btf_record_has_field(map->record, BPF_TASK_WORK))
|
||||
bpf_obj_free_task_work(map->record, val);
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
* - read and write marks propagation.
|
||||
* - The propagation phase is a textbook live variable data flow analysis:
|
||||
*
|
||||
* state[cc, i].live_after = U [state[cc, s].live_before for s in insn_successors(i)]
|
||||
* state[cc, i].live_after = U [state[cc, s].live_before for s in bpf_insn_successors(i)]
|
||||
* state[cc, i].live_before =
|
||||
* (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
|
||||
*
|
||||
@@ -54,7 +54,7 @@
|
||||
* The equation for "must_write_acc" propagation looks as follows:
|
||||
*
|
||||
* state[cc, i].must_write_acc =
|
||||
* ∩ [state[cc, s].must_write_acc for s in insn_successors(i)]
|
||||
* ∩ [state[cc, s].must_write_acc for s in bpf_insn_successors(i)]
|
||||
* U state[cc, i].must_write
|
||||
*
|
||||
* (An intersection of all "must_write_acc" for instruction successors
|
||||
@@ -447,7 +447,12 @@ int bpf_jmp_offset(struct bpf_insn *insn)
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Woverride-init", "Allow field initialization overrides for opcode_info_tbl");
|
||||
|
||||
inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
|
||||
/*
|
||||
* Returns an array of instructions succ, with succ->items[0], ...,
|
||||
* succ->items[n-1] with successor instructions, where n=succ->cnt
|
||||
*/
|
||||
inline struct bpf_iarray *
|
||||
bpf_insn_successors(struct bpf_verifier_env *env, u32 idx)
|
||||
{
|
||||
static const struct opcode_info {
|
||||
bool can_jump;
|
||||
@@ -474,19 +479,29 @@ inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
|
||||
_J(BPF_JSET, {.can_jump = true, .can_fallthrough = true}),
|
||||
#undef _J
|
||||
};
|
||||
struct bpf_prog *prog = env->prog;
|
||||
struct bpf_insn *insn = &prog->insnsi[idx];
|
||||
const struct opcode_info *opcode_info;
|
||||
int i = 0, insn_sz;
|
||||
struct bpf_iarray *succ, *jt;
|
||||
int insn_sz;
|
||||
|
||||
jt = env->insn_aux_data[idx].jt;
|
||||
if (unlikely(jt))
|
||||
return jt;
|
||||
|
||||
/* pre-allocated array of size up to 2; reset cnt, as it may have been used already */
|
||||
succ = env->succ;
|
||||
succ->cnt = 0;
|
||||
|
||||
opcode_info = &opcode_info_tbl[BPF_CLASS(insn->code) | BPF_OP(insn->code)];
|
||||
insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
|
||||
if (opcode_info->can_fallthrough)
|
||||
succ[i++] = idx + insn_sz;
|
||||
succ->items[succ->cnt++] = idx + insn_sz;
|
||||
|
||||
if (opcode_info->can_jump)
|
||||
succ[i++] = idx + bpf_jmp_offset(insn) + 1;
|
||||
succ->items[succ->cnt++] = idx + bpf_jmp_offset(insn) + 1;
|
||||
|
||||
return i;
|
||||
return succ;
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
@@ -524,6 +539,8 @@ static int propagate_to_outer_instance(struct bpf_verifier_env *env,
|
||||
|
||||
this_subprog_start = callchain_subprog_start(callchain);
|
||||
outer_instance = get_outer_instance(env, instance);
|
||||
if (IS_ERR(outer_instance))
|
||||
return PTR_ERR(outer_instance);
|
||||
callsite = callchain->callsites[callchain->curframe - 1];
|
||||
|
||||
reset_stack_write_marks(env, outer_instance, callsite);
|
||||
@@ -546,11 +563,12 @@ static inline bool update_insn(struct bpf_verifier_env *env,
|
||||
struct bpf_insn_aux_data *aux = env->insn_aux_data;
|
||||
u64 new_before, new_after, must_write_acc;
|
||||
struct per_frame_masks *insn, *succ_insn;
|
||||
u32 succ_num, s, succ[2];
|
||||
struct bpf_iarray *succ;
|
||||
u32 s;
|
||||
bool changed;
|
||||
|
||||
succ_num = bpf_insn_successors(env->prog, insn_idx, succ);
|
||||
if (unlikely(succ_num == 0))
|
||||
succ = bpf_insn_successors(env, insn_idx);
|
||||
if (succ->cnt == 0)
|
||||
return false;
|
||||
|
||||
changed = false;
|
||||
@@ -562,8 +580,8 @@ static inline bool update_insn(struct bpf_verifier_env *env,
|
||||
* of successors plus all "must_write" slots of instruction itself.
|
||||
*/
|
||||
must_write_acc = U64_MAX;
|
||||
for (s = 0; s < succ_num; ++s) {
|
||||
succ_insn = get_frame_masks(instance, frame, succ[s]);
|
||||
for (s = 0; s < succ->cnt; ++s) {
|
||||
succ_insn = get_frame_masks(instance, frame, succ->items[s]);
|
||||
new_after |= succ_insn->live_before;
|
||||
must_write_acc &= succ_insn->must_write_acc;
|
||||
}
|
||||
|
||||
@@ -461,6 +461,7 @@ const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
|
||||
[PTR_TO_ARENA] = "arena",
|
||||
[PTR_TO_BUF] = "buf",
|
||||
[PTR_TO_FUNC] = "func",
|
||||
[PTR_TO_INSN] = "insn",
|
||||
[PTR_TO_MAP_KEY] = "map_key",
|
||||
[CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
|
||||
};
|
||||
@@ -500,6 +501,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
|
||||
return "xdp";
|
||||
case BPF_DYNPTR_TYPE_SKB_META:
|
||||
return "skb_meta";
|
||||
case BPF_DYNPTR_TYPE_FILE:
|
||||
return "file";
|
||||
case BPF_DYNPTR_TYPE_INVALID:
|
||||
return "<invalid>";
|
||||
default:
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <linux/interval_tree_generic.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "range_tree.h"
|
||||
|
||||
@@ -21,7 +20,7 @@
|
||||
* in commit 6772fcc8890a ("xfs: convert xbitmap to interval tree").
|
||||
*
|
||||
* The implementation relies on external lock to protect rbtree-s.
|
||||
* The alloc/free of range_node-s is done via bpf_mem_alloc.
|
||||
* The alloc/free of range_node-s is done via kmalloc_nolock().
|
||||
*
|
||||
* bpf arena is using range_tree to represent unallocated slots.
|
||||
* At init time:
|
||||
@@ -150,9 +149,7 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
|
||||
range_it_insert(rn, rt);
|
||||
|
||||
/* Add a range */
|
||||
migrate_disable();
|
||||
new_rn = bpf_mem_alloc(&bpf_global_ma, sizeof(struct range_node));
|
||||
migrate_enable();
|
||||
new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
|
||||
if (!new_rn)
|
||||
return -ENOMEM;
|
||||
new_rn->rn_start = last + 1;
|
||||
@@ -172,9 +169,7 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len)
|
||||
} else {
|
||||
/* in the middle of the clearing range */
|
||||
range_it_remove(rn, rt);
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, rn);
|
||||
migrate_enable();
|
||||
kfree_nolock(rn);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@@ -227,9 +222,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
|
||||
range_it_remove(right, rt);
|
||||
left->rn_last = right->rn_last;
|
||||
range_it_insert(left, rt);
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_global_ma, right);
|
||||
migrate_enable();
|
||||
kfree_nolock(right);
|
||||
} else if (left) {
|
||||
/* Combine with the left range */
|
||||
range_it_remove(left, rt);
|
||||
@@ -241,9 +234,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len)
|
||||
right->rn_start = start;
|
||||
range_it_insert(right, rt);
|
||||
} else {
|
||||
migrate_disable();
|
||||
left = bpf_mem_alloc(&bpf_global_ma, sizeof(struct range_node));
|
||||
migrate_enable();
|
||||
left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE);
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
left->rn_start = start;
|
||||
@@ -259,7 +250,7 @@ void range_tree_destroy(struct range_tree *rt)
|
||||
|
||||
while ((rn = range_it_iter_first(rt, 0, -1U))) {
|
||||
range_it_remove(rn, rt);
|
||||
bpf_mem_free(&bpf_global_ma, rn);
|
||||
kfree_nolock(rn);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include <linux/btf_ids.h>
|
||||
#include <asm/rqspinlock.h>
|
||||
|
||||
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
|
||||
#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE | BPF_F_RB_OVERWRITE)
|
||||
|
||||
/* non-mmap()'able part of bpf_ringbuf (everything up to consumer page) */
|
||||
#define RINGBUF_PGOFF \
|
||||
@@ -30,6 +30,7 @@ struct bpf_ringbuf {
|
||||
u64 mask;
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
bool overwrite_mode;
|
||||
rqspinlock_t spinlock ____cacheline_aligned_in_smp;
|
||||
/* For user-space producer ring buffers, an atomic_t busy bit is used
|
||||
* to synchronize access to the ring buffers in the kernel, rather than
|
||||
@@ -73,6 +74,7 @@ struct bpf_ringbuf {
|
||||
unsigned long consumer_pos __aligned(PAGE_SIZE);
|
||||
unsigned long producer_pos __aligned(PAGE_SIZE);
|
||||
unsigned long pending_pos;
|
||||
unsigned long overwrite_pos; /* position after the last overwritten record */
|
||||
char data[] __aligned(PAGE_SIZE);
|
||||
};
|
||||
|
||||
@@ -166,7 +168,7 @@ static void bpf_ringbuf_notify(struct irq_work *work)
|
||||
* considering that the maximum value of data_sz is (4GB - 1), there
|
||||
* will be no overflow, so just note the size limit in the comments.
|
||||
*/
|
||||
static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
|
||||
static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node, bool overwrite_mode)
|
||||
{
|
||||
struct bpf_ringbuf *rb;
|
||||
|
||||
@@ -183,17 +185,25 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
|
||||
rb->consumer_pos = 0;
|
||||
rb->producer_pos = 0;
|
||||
rb->pending_pos = 0;
|
||||
rb->overwrite_mode = overwrite_mode;
|
||||
|
||||
return rb;
|
||||
}
|
||||
|
||||
static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
bool overwrite_mode = false;
|
||||
struct bpf_ringbuf_map *rb_map;
|
||||
|
||||
if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (attr->map_flags & BPF_F_RB_OVERWRITE) {
|
||||
if (attr->map_type != BPF_MAP_TYPE_RINGBUF)
|
||||
return ERR_PTR(-EINVAL);
|
||||
overwrite_mode = true;
|
||||
}
|
||||
|
||||
if (attr->key_size || attr->value_size ||
|
||||
!is_power_of_2(attr->max_entries) ||
|
||||
!PAGE_ALIGNED(attr->max_entries))
|
||||
@@ -205,7 +215,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
|
||||
|
||||
bpf_map_init_from_attr(&rb_map->map, attr);
|
||||
|
||||
rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
|
||||
rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node, overwrite_mode);
|
||||
if (!rb_map->rb) {
|
||||
bpf_map_area_free(rb_map);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@@ -295,13 +305,26 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma
|
||||
return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an estimate of the available data in the ring buffer.
|
||||
* Note: the returned value can exceed the actual ring buffer size because the
|
||||
* function is not synchronized with the producer. The producer acquires the
|
||||
* ring buffer's spinlock, but this function does not.
|
||||
*/
|
||||
static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb)
|
||||
{
|
||||
unsigned long cons_pos, prod_pos;
|
||||
unsigned long cons_pos, prod_pos, over_pos;
|
||||
|
||||
cons_pos = smp_load_acquire(&rb->consumer_pos);
|
||||
prod_pos = smp_load_acquire(&rb->producer_pos);
|
||||
return prod_pos - cons_pos;
|
||||
|
||||
if (unlikely(rb->overwrite_mode)) {
|
||||
over_pos = smp_load_acquire(&rb->overwrite_pos);
|
||||
prod_pos = smp_load_acquire(&rb->producer_pos);
|
||||
return prod_pos - max(cons_pos, over_pos);
|
||||
} else {
|
||||
prod_pos = smp_load_acquire(&rb->producer_pos);
|
||||
return prod_pos - cons_pos;
|
||||
}
|
||||
}
|
||||
|
||||
static u32 ringbuf_total_data_sz(const struct bpf_ringbuf *rb)
|
||||
@@ -404,11 +427,43 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
|
||||
return (void*)((addr & PAGE_MASK) - off);
|
||||
}
|
||||
|
||||
static bool bpf_ringbuf_has_space(const struct bpf_ringbuf *rb,
|
||||
unsigned long new_prod_pos,
|
||||
unsigned long cons_pos,
|
||||
unsigned long pend_pos)
|
||||
{
|
||||
/*
|
||||
* No space if oldest not yet committed record until the newest
|
||||
* record span more than (ringbuf_size - 1).
|
||||
*/
|
||||
if (new_prod_pos - pend_pos > rb->mask)
|
||||
return false;
|
||||
|
||||
/* Ok, we have space in overwrite mode */
|
||||
if (unlikely(rb->overwrite_mode))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* No space if producer position advances more than (ringbuf_size - 1)
|
||||
* ahead of consumer position when not in overwrite mode.
|
||||
*/
|
||||
if (new_prod_pos - cons_pos > rb->mask)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static u32 bpf_ringbuf_round_up_hdr_len(u32 hdr_len)
|
||||
{
|
||||
hdr_len &= ~BPF_RINGBUF_DISCARD_BIT;
|
||||
return round_up(hdr_len + BPF_RINGBUF_HDR_SZ, 8);
|
||||
}
|
||||
|
||||
static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
{
|
||||
unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
|
||||
unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, over_pos, flags;
|
||||
struct bpf_ringbuf_hdr *hdr;
|
||||
u32 len, pg_off, tmp_size, hdr_len;
|
||||
u32 len, pg_off, hdr_len;
|
||||
|
||||
if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
|
||||
return NULL;
|
||||
@@ -431,24 +486,43 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
|
||||
hdr_len = READ_ONCE(hdr->len);
|
||||
if (hdr_len & BPF_RINGBUF_BUSY_BIT)
|
||||
break;
|
||||
tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
|
||||
tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
|
||||
pend_pos += tmp_size;
|
||||
pend_pos += bpf_ringbuf_round_up_hdr_len(hdr_len);
|
||||
}
|
||||
rb->pending_pos = pend_pos;
|
||||
|
||||
/* check for out of ringbuf space:
|
||||
* - by ensuring producer position doesn't advance more than
|
||||
* (ringbuf_size - 1) ahead
|
||||
* - by ensuring oldest not yet committed record until newest
|
||||
* record does not span more than (ringbuf_size - 1)
|
||||
*/
|
||||
if (new_prod_pos - cons_pos > rb->mask ||
|
||||
new_prod_pos - pend_pos > rb->mask) {
|
||||
if (!bpf_ringbuf_has_space(rb, new_prod_pos, cons_pos, pend_pos)) {
|
||||
raw_res_spin_unlock_irqrestore(&rb->spinlock, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* In overwrite mode, advance overwrite_pos when the ring buffer is full.
|
||||
* The key points are to stay on record boundaries and consume enough records
|
||||
* to fit the new one.
|
||||
*/
|
||||
if (unlikely(rb->overwrite_mode)) {
|
||||
over_pos = rb->overwrite_pos;
|
||||
while (new_prod_pos - over_pos > rb->mask) {
|
||||
hdr = (void *)rb->data + (over_pos & rb->mask);
|
||||
hdr_len = READ_ONCE(hdr->len);
|
||||
/*
|
||||
* The bpf_ringbuf_has_space() check above ensures we won’t
|
||||
* step over a record currently being worked on by another
|
||||
* producer.
|
||||
*/
|
||||
over_pos += bpf_ringbuf_round_up_hdr_len(hdr_len);
|
||||
}
|
||||
/*
|
||||
* smp_store_release(&rb->producer_pos, new_prod_pos) at
|
||||
* the end of the function ensures that when consumer sees
|
||||
* the updated rb->producer_pos, it always sees the updated
|
||||
* rb->overwrite_pos, so when consumer reads overwrite_pos
|
||||
* after smp_load_acquire(r->producer_pos), the overwrite_pos
|
||||
* will always be valid.
|
||||
*/
|
||||
WRITE_ONCE(rb->overwrite_pos, over_pos);
|
||||
}
|
||||
|
||||
hdr = (void *)rb->data + (prod_pos & rb->mask);
|
||||
pg_off = bpf_ringbuf_rec_pg_off(rb, hdr);
|
||||
hdr->len = size | BPF_RINGBUF_BUSY_BIT;
|
||||
@@ -578,6 +652,8 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
|
||||
return smp_load_acquire(&rb->consumer_pos);
|
||||
case BPF_RB_PROD_POS:
|
||||
return smp_load_acquire(&rb->producer_pos);
|
||||
case BPF_RB_OVERWRITE_POS:
|
||||
return smp_load_acquire(&rb->overwrite_pos);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -89,15 +89,14 @@ struct rqspinlock_timeout {
|
||||
DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks);
|
||||
EXPORT_SYMBOL_GPL(rqspinlock_held_locks);
|
||||
|
||||
static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts)
|
||||
static bool is_lock_released(rqspinlock_t *lock, u32 mask)
|
||||
{
|
||||
if (!(atomic_read_acquire(&lock->val) & (mask)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask,
|
||||
struct rqspinlock_timeout *ts)
|
||||
static noinline int check_deadlock_AA(rqspinlock_t *lock)
|
||||
{
|
||||
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
|
||||
int cnt = min(RES_NR_HELD, rqh->cnt);
|
||||
@@ -118,8 +117,7 @@ static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask,
|
||||
* more locks, which reduce to ABBA). This is not exhaustive, and we rely on
|
||||
* timeouts as the final line of defense.
|
||||
*/
|
||||
static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
|
||||
struct rqspinlock_timeout *ts)
|
||||
static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask)
|
||||
{
|
||||
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
|
||||
int rqh_cnt = min(RES_NR_HELD, rqh->cnt);
|
||||
@@ -142,7 +140,7 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
|
||||
* Let's ensure to break out of this loop if the lock is available for
|
||||
* us to potentially acquire.
|
||||
*/
|
||||
if (is_lock_released(lock, mask, ts))
|
||||
if (is_lock_released(lock, mask))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -198,33 +196,21 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int check_deadlock(rqspinlock_t *lock, u32 mask,
|
||||
struct rqspinlock_timeout *ts)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = check_deadlock_AA(lock, mask, ts);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = check_deadlock_ABBA(lock, mask, ts);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
|
||||
struct rqspinlock_timeout *ts)
|
||||
{
|
||||
u64 time = ktime_get_mono_fast_ns();
|
||||
u64 prev = ts->cur;
|
||||
u64 time;
|
||||
|
||||
if (!ts->timeout_end) {
|
||||
ts->cur = time;
|
||||
ts->timeout_end = time + ts->duration;
|
||||
if (check_deadlock_AA(lock))
|
||||
return -EDEADLK;
|
||||
ts->cur = ktime_get_mono_fast_ns();
|
||||
ts->timeout_end = ts->cur + ts->duration;
|
||||
return 0;
|
||||
}
|
||||
|
||||
time = ktime_get_mono_fast_ns();
|
||||
if (time > ts->timeout_end)
|
||||
return -ETIMEDOUT;
|
||||
|
||||
@@ -234,7 +220,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask,
|
||||
*/
|
||||
if (prev + NSEC_PER_MSEC < time) {
|
||||
ts->cur = time;
|
||||
return check_deadlock(lock, mask, ts);
|
||||
return check_deadlock_ABBA(lock, mask);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -278,6 +264,10 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock)
|
||||
int val, ret = 0;
|
||||
|
||||
RES_INIT_TIMEOUT(ts);
|
||||
/*
|
||||
* The fast path is not invoked for the TAS fallback, so we must grab
|
||||
* the deadlock detection entry here.
|
||||
*/
|
||||
grab_held_lock_entry(lock);
|
||||
|
||||
/*
|
||||
@@ -400,10 +390,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
|
||||
goto queue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Grab an entry in the held locks array, to enable deadlock detection.
|
||||
*/
|
||||
grab_held_lock_entry(lock);
|
||||
/* Deadlock detection entry already held after failing fast path. */
|
||||
|
||||
/*
|
||||
* We're pending, wait for the owner to go away.
|
||||
@@ -450,12 +437,21 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
|
||||
* queuing.
|
||||
*/
|
||||
queue:
|
||||
lockevent_inc(lock_slowpath);
|
||||
/*
|
||||
* Grab deadlock detection entry for the queue path.
|
||||
* Do not queue if we're a waiter and someone is attempting this lock on
|
||||
* the same CPU. In case of NMIs, this prevents long timeouts where we
|
||||
* interrupt the pending waiter, and the owner, that will eventually
|
||||
* signal the head of our queue, both of which are logically but not
|
||||
* physically part of the queue, hence outside the scope of the idx > 0
|
||||
* check above for the trylock fallback.
|
||||
*/
|
||||
grab_held_lock_entry(lock);
|
||||
if (check_deadlock_AA(lock)) {
|
||||
ret = -EDEADLK;
|
||||
goto err_release_entry;
|
||||
}
|
||||
|
||||
lockevent_inc(lock_slowpath);
|
||||
/* Deadlock detection entry already held after failing fast path. */
|
||||
node = this_cpu_ptr(&rqnodes[0].mcs);
|
||||
idx = node->count++;
|
||||
tail = encode_tail(smp_processor_id(), idx);
|
||||
@@ -467,19 +463,17 @@ queue:
|
||||
* not be nested NMIs taking spinlocks. That may not be true in
|
||||
* some architectures even though the chance of needing more than
|
||||
* 4 nodes will still be extremely unlikely. When that happens,
|
||||
* we fall back to spinning on the lock directly without using
|
||||
* any MCS node. This is not the most elegant solution, but is
|
||||
* simple enough.
|
||||
* we fall back to attempting a trylock operation without using
|
||||
* any MCS node. Unlike qspinlock which cannot fail, we have the
|
||||
* option of failing the slow path, and under contention, such a
|
||||
* trylock spinning will likely be treated unfairly due to lack of
|
||||
* queueing, hence do not spin.
|
||||
*/
|
||||
if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) {
|
||||
if (unlikely(idx >= _Q_MAX_NODES || (in_nmi() && idx > 0))) {
|
||||
lockevent_inc(lock_no_node);
|
||||
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
|
||||
while (!queued_spin_trylock(lock)) {
|
||||
if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) {
|
||||
lockevent_inc(rqspinlock_lock_timeout);
|
||||
goto err_release_node;
|
||||
}
|
||||
cpu_relax();
|
||||
if (!queued_spin_trylock(lock)) {
|
||||
ret = -EDEADLK;
|
||||
goto err_release_node;
|
||||
}
|
||||
goto release;
|
||||
}
|
||||
@@ -540,7 +534,7 @@ queue:
|
||||
|
||||
val = arch_mcs_spin_lock_contended(&node->locked);
|
||||
if (val == RES_TIMEOUT_VAL) {
|
||||
ret = -EDEADLK;
|
||||
ret = -ETIMEDOUT;
|
||||
goto waitq_timeout;
|
||||
}
|
||||
|
||||
@@ -575,6 +569,14 @@ queue:
|
||||
val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) ||
|
||||
RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK));
|
||||
|
||||
/* Disable queue destruction when we detect deadlocks. */
|
||||
if (ret == -EDEADLK) {
|
||||
if (!next)
|
||||
next = smp_cond_load_relaxed(&node->next, (VAL));
|
||||
arch_mcs_spin_unlock_contended(&next->locked);
|
||||
goto err_release_node;
|
||||
}
|
||||
|
||||
waitq_timeout:
|
||||
if (ret) {
|
||||
/*
|
||||
|
||||
@@ -42,6 +42,28 @@ static inline int stack_map_data_size(struct bpf_map *map)
|
||||
sizeof(struct bpf_stack_build_id) : sizeof(u64);
|
||||
}
|
||||
|
||||
/**
|
||||
* stack_map_calculate_max_depth - Calculate maximum allowed stack trace depth
|
||||
* @size: Size of the buffer/map value in bytes
|
||||
* @elem_size: Size of each stack trace element
|
||||
* @flags: BPF stack trace flags (BPF_F_USER_STACK, BPF_F_USER_BUILD_ID, ...)
|
||||
*
|
||||
* Return: Maximum number of stack trace entries that can be safely stored
|
||||
*/
|
||||
static u32 stack_map_calculate_max_depth(u32 size, u32 elem_size, u64 flags)
|
||||
{
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
u32 max_depth;
|
||||
u32 curr_sysctl_max_stack = READ_ONCE(sysctl_perf_event_max_stack);
|
||||
|
||||
max_depth = size / elem_size;
|
||||
max_depth += skip;
|
||||
if (max_depth > curr_sysctl_max_stack)
|
||||
return curr_sysctl_max_stack;
|
||||
|
||||
return max_depth;
|
||||
}
|
||||
|
||||
static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
|
||||
{
|
||||
u64 elem_size = sizeof(struct stack_map_bucket) +
|
||||
@@ -229,8 +251,8 @@ static long __bpf_get_stackid(struct bpf_map *map,
|
||||
{
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
||||
u32 hash, id, trace_nr, trace_len, i, max_depth;
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
u32 hash, id, trace_nr, trace_len, i;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
u64 *ips;
|
||||
bool hash_matches;
|
||||
@@ -239,7 +261,8 @@ static long __bpf_get_stackid(struct bpf_map *map,
|
||||
/* skipping more than usable stack trace */
|
||||
return -EFAULT;
|
||||
|
||||
trace_nr = trace->nr - skip;
|
||||
max_depth = stack_map_calculate_max_depth(map->value_size, stack_map_data_size(map), flags);
|
||||
trace_nr = min_t(u32, trace->nr - skip, max_depth - skip);
|
||||
trace_len = trace_nr * sizeof(u64);
|
||||
ips = trace->ip + skip;
|
||||
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
|
||||
@@ -300,20 +323,17 @@ static long __bpf_get_stackid(struct bpf_map *map,
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
u32 max_depth = map->value_size / stack_map_data_size(map);
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
u32 elem_size = stack_map_data_size(map);
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel = !user;
|
||||
u32 max_depth;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
|
||||
return -EINVAL;
|
||||
|
||||
max_depth += skip;
|
||||
if (max_depth > sysctl_perf_event_max_stack)
|
||||
max_depth = sysctl_perf_event_max_stack;
|
||||
|
||||
max_depth = stack_map_calculate_max_depth(map->value_size, elem_size, flags);
|
||||
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
||||
false, false, 0);
|
||||
|
||||
@@ -371,15 +391,11 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
return -EFAULT;
|
||||
|
||||
nr_kernel = count_kernel_ip(trace);
|
||||
__u64 nr = trace->nr; /* save original */
|
||||
|
||||
if (kernel) {
|
||||
__u64 nr = trace->nr;
|
||||
|
||||
trace->nr = nr_kernel;
|
||||
ret = __bpf_get_stackid(map, trace, flags);
|
||||
|
||||
/* restore nr */
|
||||
trace->nr = nr;
|
||||
} else { /* user */
|
||||
u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
|
||||
@@ -390,6 +406,10 @@ BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
|
||||
flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
|
||||
ret = __bpf_get_stackid(map, trace, flags);
|
||||
}
|
||||
|
||||
/* restore nr */
|
||||
trace->nr = nr;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -406,7 +426,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||
struct perf_callchain_entry *trace_in,
|
||||
void *buf, u32 size, u64 flags, bool may_fault)
|
||||
{
|
||||
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
|
||||
u32 trace_nr, copy_len, elem_size, max_depth;
|
||||
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
||||
bool crosstask = task && task != current;
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
@@ -438,21 +458,20 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||
goto clear;
|
||||
}
|
||||
|
||||
num_elem = size / elem_size;
|
||||
max_depth = num_elem + skip;
|
||||
if (sysctl_perf_event_max_stack < max_depth)
|
||||
max_depth = sysctl_perf_event_max_stack;
|
||||
max_depth = stack_map_calculate_max_depth(size, elem_size, flags);
|
||||
|
||||
if (may_fault)
|
||||
rcu_read_lock(); /* need RCU for perf's callchain below */
|
||||
|
||||
if (trace_in)
|
||||
if (trace_in) {
|
||||
trace = trace_in;
|
||||
else if (kernel && task)
|
||||
trace->nr = min_t(u32, trace->nr, max_depth);
|
||||
} else if (kernel && task) {
|
||||
trace = get_callchain_entry_for_task(task, max_depth);
|
||||
else
|
||||
} else {
|
||||
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
||||
crosstask, false, 0);
|
||||
}
|
||||
|
||||
if (unlikely(!trace) || trace->nr < skip) {
|
||||
if (may_fault)
|
||||
@@ -461,7 +480,6 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||
}
|
||||
|
||||
trace_nr = trace->nr - skip;
|
||||
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
|
||||
copy_len = trace_nr * elem_size;
|
||||
|
||||
ips = trace->ip + skip;
|
||||
|
||||
@@ -4,111 +4,10 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/local_lock.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/*
|
||||
* Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe
|
||||
* try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and
|
||||
* stash it in a local per-CPU variable, and bump allocate from the page
|
||||
* whenever items need to be printed to a stream. Each page holds a global
|
||||
* atomic refcount in its first 4 bytes, and then records of variable length
|
||||
* that describe the printed messages. Once the global refcount has dropped to
|
||||
* zero, it is a signal to free the page back to the kernel's page allocator,
|
||||
* given all the individual records in it have been consumed.
|
||||
*
|
||||
* It is possible the same page is used to serve allocations across different
|
||||
* programs, which may be consumed at different times individually, hence
|
||||
* maintaining a reference count per-page is critical for correct lifetime
|
||||
* tracking.
|
||||
*
|
||||
* The bpf_stream_page code will be replaced to use kmalloc_nolock() once it
|
||||
* lands.
|
||||
*/
|
||||
struct bpf_stream_page {
|
||||
refcount_t ref;
|
||||
u32 consumed;
|
||||
char buf[];
|
||||
};
|
||||
|
||||
/* Available room to add data to a refcounted page. */
|
||||
#define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed))
|
||||
|
||||
static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock);
|
||||
static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page);
|
||||
|
||||
static bool bpf_stream_page_local_lock(unsigned long *flags)
|
||||
{
|
||||
return local_trylock_irqsave(&stream_local_lock, *flags);
|
||||
}
|
||||
|
||||
static void bpf_stream_page_local_unlock(unsigned long *flags)
|
||||
{
|
||||
local_unlock_irqrestore(&stream_local_lock, *flags);
|
||||
}
|
||||
|
||||
static void bpf_stream_page_free(struct bpf_stream_page *stream_page)
|
||||
{
|
||||
struct page *p;
|
||||
|
||||
if (!stream_page)
|
||||
return;
|
||||
p = virt_to_page(stream_page);
|
||||
free_pages_nolock(p, 0);
|
||||
}
|
||||
|
||||
static void bpf_stream_page_get(struct bpf_stream_page *stream_page)
|
||||
{
|
||||
refcount_inc(&stream_page->ref);
|
||||
}
|
||||
|
||||
static void bpf_stream_page_put(struct bpf_stream_page *stream_page)
|
||||
{
|
||||
if (refcount_dec_and_test(&stream_page->ref))
|
||||
bpf_stream_page_free(stream_page);
|
||||
}
|
||||
|
||||
static void bpf_stream_page_init(struct bpf_stream_page *stream_page)
|
||||
{
|
||||
refcount_set(&stream_page->ref, 1);
|
||||
stream_page->consumed = 0;
|
||||
}
|
||||
|
||||
static struct bpf_stream_page *bpf_stream_page_replace(void)
|
||||
{
|
||||
struct bpf_stream_page *stream_page, *old_stream_page;
|
||||
struct page *page;
|
||||
|
||||
page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
|
||||
if (!page)
|
||||
return NULL;
|
||||
stream_page = page_address(page);
|
||||
bpf_stream_page_init(stream_page);
|
||||
|
||||
old_stream_page = this_cpu_read(stream_pcpu_page);
|
||||
if (old_stream_page)
|
||||
bpf_stream_page_put(old_stream_page);
|
||||
this_cpu_write(stream_pcpu_page, stream_page);
|
||||
return stream_page;
|
||||
}
|
||||
|
||||
static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len)
|
||||
{
|
||||
int min = offsetof(struct bpf_stream_elem, str[0]);
|
||||
int consumed = stream_page->consumed;
|
||||
int total = BPF_STREAM_PAGE_SZ;
|
||||
int rem = max(0, total - consumed - min);
|
||||
|
||||
/* Let's give room of at least 8 bytes. */
|
||||
WARN_ON_ONCE(rem % 8 != 0);
|
||||
rem = rem < 8 ? 0 : rem;
|
||||
return min(len, rem);
|
||||
}
|
||||
|
||||
static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len)
|
||||
{
|
||||
init_llist_node(&elem->node);
|
||||
@@ -116,54 +15,12 @@ static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len)
|
||||
elem->consumed_len = 0;
|
||||
}
|
||||
|
||||
static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem)
|
||||
{
|
||||
unsigned long addr = (unsigned long)elem;
|
||||
|
||||
return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr);
|
||||
}
|
||||
|
||||
static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len)
|
||||
{
|
||||
u32 consumed = stream_page->consumed;
|
||||
|
||||
stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8);
|
||||
return (struct bpf_stream_elem *)&stream_page->buf[consumed];
|
||||
}
|
||||
|
||||
static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len)
|
||||
{
|
||||
struct bpf_stream_elem *elem = NULL;
|
||||
struct bpf_stream_page *page;
|
||||
int room = 0;
|
||||
|
||||
page = this_cpu_read(stream_pcpu_page);
|
||||
if (!page)
|
||||
page = bpf_stream_page_replace();
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
room = bpf_stream_page_check_room(page, len);
|
||||
if (room != len)
|
||||
page = bpf_stream_page_replace();
|
||||
if (!page)
|
||||
return NULL;
|
||||
bpf_stream_page_get(page);
|
||||
room = bpf_stream_page_check_room(page, len);
|
||||
WARN_ON_ONCE(room != len);
|
||||
|
||||
elem = bpf_stream_page_push_elem(page, room);
|
||||
bpf_stream_elem_init(elem, room);
|
||||
return elem;
|
||||
}
|
||||
|
||||
static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
|
||||
{
|
||||
const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf);
|
||||
struct bpf_stream_elem *elem;
|
||||
unsigned long flags;
|
||||
size_t alloc_size;
|
||||
|
||||
BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ);
|
||||
/*
|
||||
* Length denotes the amount of data to be written as part of stream element,
|
||||
* thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can
|
||||
@@ -172,10 +29,13 @@ static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
|
||||
if (len < 0 || len > max_len)
|
||||
return NULL;
|
||||
|
||||
if (!bpf_stream_page_local_lock(&flags))
|
||||
alloc_size = offsetof(struct bpf_stream_elem, str[len]);
|
||||
elem = kmalloc_nolock(alloc_size, __GFP_ZERO, -1);
|
||||
if (!elem)
|
||||
return NULL;
|
||||
elem = bpf_stream_page_reserve_elem(len);
|
||||
bpf_stream_page_local_unlock(&flags);
|
||||
|
||||
bpf_stream_elem_init(elem, len);
|
||||
|
||||
return elem;
|
||||
}
|
||||
|
||||
@@ -231,10 +91,7 @@ static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bp
|
||||
|
||||
static void bpf_stream_free_elem(struct bpf_stream_elem *elem)
|
||||
{
|
||||
struct bpf_stream_page *p;
|
||||
|
||||
p = bpf_stream_page_from_elem(elem);
|
||||
bpf_stream_page_put(p);
|
||||
kfree_nolock(elem);
|
||||
}
|
||||
|
||||
static void bpf_stream_free_list(struct llist_node *list)
|
||||
|
||||
@@ -158,7 +158,7 @@ static void maybe_wait_bpf_programs(struct bpf_map *map)
|
||||
*/
|
||||
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
|
||||
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
|
||||
synchronize_rcu();
|
||||
synchronize_rcu_expedited();
|
||||
}
|
||||
|
||||
static void unpin_uptr_kaddr(void *kaddr)
|
||||
@@ -1493,6 +1493,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
|
||||
case BPF_MAP_TYPE_STRUCT_OPS:
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
case BPF_MAP_TYPE_INSN_ARRAY:
|
||||
if (!bpf_token_capable(token, CAP_BPF))
|
||||
goto put_token;
|
||||
break;
|
||||
@@ -1585,7 +1586,8 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr)
|
||||
goto free_map;
|
||||
}
|
||||
} else if (attr->excl_prog_hash_size) {
|
||||
return -EINVAL;
|
||||
err = -EINVAL;
|
||||
goto free_map;
|
||||
}
|
||||
|
||||
err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
|
||||
@@ -1724,9 +1726,6 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
CLASS(fd, f)(attr->map_fd);
|
||||
map = __bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
@@ -1734,9 +1733,9 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ))
|
||||
return -EPERM;
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK))
|
||||
return -EINVAL;
|
||||
err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key = __bpf_copy_key(ukey, map->key_size);
|
||||
if (IS_ERR(key))
|
||||
@@ -1799,11 +1798,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if ((attr->flags & BPF_F_LOCK) &&
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
err = -EINVAL;
|
||||
err = bpf_map_check_op_flags(map, attr->flags, ~0);
|
||||
if (err)
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
key = ___bpf_copy_key(ukey, map->key_size);
|
||||
if (IS_ERR(key)) {
|
||||
@@ -2007,13 +2004,9 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
|
||||
void *key, *value;
|
||||
int err = 0;
|
||||
|
||||
if (attr->batch.elem_flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
value_size = bpf_map_value_size(map);
|
||||
|
||||
@@ -2070,12 +2063,9 @@ int generic_map_lookup_batch(struct bpf_map *map,
|
||||
u32 value_size, cp, max_count;
|
||||
int err;
|
||||
|
||||
if (attr->batch.elem_flags & ~BPF_F_LOCK)
|
||||
return -EINVAL;
|
||||
|
||||
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
|
||||
!btf_record_has_field(map->record, BPF_SPIN_LOCK))
|
||||
return -EINVAL;
|
||||
err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
value_size = bpf_map_value_size(map);
|
||||
|
||||
@@ -2462,6 +2452,9 @@ void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
|
||||
struct bpf_prog_stats *stats;
|
||||
unsigned int flags;
|
||||
|
||||
if (unlikely(!prog->stats))
|
||||
return;
|
||||
|
||||
stats = this_cpu_ptr(prog->stats);
|
||||
flags = u64_stats_update_begin_irqsave(&stats->syncp);
|
||||
u64_stats_inc(&stats->misses);
|
||||
@@ -2853,6 +2846,23 @@ static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
|
||||
{
|
||||
int err;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < prog->aux->used_map_cnt; i++) {
|
||||
if (prog->aux->used_maps[i]->map_type != BPF_MAP_TYPE_INSN_ARRAY)
|
||||
continue;
|
||||
|
||||
err = bpf_insn_array_ready(prog->aux->used_maps[i]);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* last field in 'union bpf_attr' used by this command */
|
||||
#define BPF_PROG_LOAD_LAST_FIELD keyring_id
|
||||
|
||||
@@ -3082,6 +3092,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
|
||||
if (err < 0)
|
||||
goto free_used_maps;
|
||||
|
||||
err = bpf_prog_mark_insn_arrays_ready(prog);
|
||||
if (err < 0)
|
||||
goto free_used_maps;
|
||||
|
||||
err = bpf_prog_alloc_id(prog);
|
||||
if (err)
|
||||
goto free_used_maps;
|
||||
@@ -5034,19 +5048,19 @@ static int bpf_prog_get_info_by_fd(struct file *file,
|
||||
struct bpf_insn *insns_sanitized;
|
||||
bool fault;
|
||||
|
||||
if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
|
||||
if (!prog->blinded || bpf_dump_raw_ok(file->f_cred)) {
|
||||
insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
|
||||
if (!insns_sanitized)
|
||||
return -ENOMEM;
|
||||
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
|
||||
ulen = min_t(u32, info.xlated_prog_len, ulen);
|
||||
fault = copy_to_user(uinsns, insns_sanitized, ulen);
|
||||
kfree(insns_sanitized);
|
||||
if (fault)
|
||||
return -EFAULT;
|
||||
} else {
|
||||
info.xlated_prog_insns = 0;
|
||||
goto done;
|
||||
}
|
||||
insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
|
||||
if (!insns_sanitized)
|
||||
return -ENOMEM;
|
||||
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
|
||||
ulen = min_t(u32, info.xlated_prog_len, ulen);
|
||||
fault = copy_to_user(uinsns, insns_sanitized, ulen);
|
||||
kfree(insns_sanitized);
|
||||
if (fault)
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (bpf_prog_is_offloaded(prog->aux)) {
|
||||
|
||||
@@ -175,23 +175,42 @@ out:
|
||||
return tr;
|
||||
}
|
||||
|
||||
static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
|
||||
static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flags,
|
||||
void *old_addr, void *new_addr)
|
||||
{
|
||||
enum bpf_text_poke_type new_t = BPF_MOD_CALL, old_t = BPF_MOD_CALL;
|
||||
void *ip = tr->func.addr;
|
||||
|
||||
if (!new_addr)
|
||||
new_t = BPF_MOD_NOP;
|
||||
else if (bpf_trampoline_use_jmp(tr->flags))
|
||||
new_t = BPF_MOD_JUMP;
|
||||
|
||||
if (!old_addr)
|
||||
old_t = BPF_MOD_NOP;
|
||||
else if (bpf_trampoline_use_jmp(orig_flags))
|
||||
old_t = BPF_MOD_JUMP;
|
||||
|
||||
return bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
|
||||
}
|
||||
|
||||
static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
|
||||
void *old_addr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (tr->func.ftrace_managed)
|
||||
ret = unregister_ftrace_direct(tr->fops, (long)old_addr, false);
|
||||
else
|
||||
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
|
||||
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr, NULL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
|
||||
static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
|
||||
void *old_addr, void *new_addr,
|
||||
bool lock_direct_mutex)
|
||||
{
|
||||
void *ip = tr->func.addr;
|
||||
int ret;
|
||||
|
||||
if (tr->func.ftrace_managed) {
|
||||
@@ -200,7 +219,8 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad
|
||||
else
|
||||
ret = modify_ftrace_direct_nolock(tr->fops, (long)new_addr);
|
||||
} else {
|
||||
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
|
||||
ret = bpf_trampoline_update_fentry(tr, orig_flags, old_addr,
|
||||
new_addr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -220,10 +240,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
|
||||
}
|
||||
|
||||
if (tr->func.ftrace_managed) {
|
||||
ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
|
||||
ret = ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = register_ftrace_direct(tr->fops, (long)new_addr);
|
||||
} else {
|
||||
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
|
||||
ret = bpf_trampoline_update_fentry(tr, 0, NULL, new_addr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -334,8 +356,9 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
|
||||
* call_rcu_tasks() is not necessary.
|
||||
*/
|
||||
if (im->ip_after_call) {
|
||||
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
|
||||
NULL, im->ip_epilogue);
|
||||
int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_NOP,
|
||||
BPF_MOD_JUMP, NULL,
|
||||
im->ip_epilogue);
|
||||
WARN_ON(err);
|
||||
if (IS_ENABLED(CONFIG_TASKS_RCU))
|
||||
call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
|
||||
@@ -408,7 +431,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
|
||||
return PTR_ERR(tlinks);
|
||||
|
||||
if (total == 0) {
|
||||
err = unregister_fentry(tr, tr->cur_image->image);
|
||||
err = unregister_fentry(tr, orig_flags, tr->cur_image->image);
|
||||
bpf_tramp_image_put(tr->cur_image);
|
||||
tr->cur_image = NULL;
|
||||
goto out;
|
||||
@@ -432,9 +455,20 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
again:
|
||||
if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
|
||||
(tr->flags & BPF_TRAMP_F_CALL_ORIG))
|
||||
tr->flags |= BPF_TRAMP_F_ORIG_STACK;
|
||||
if (tr->flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
if (tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) {
|
||||
/* The BPF_TRAMP_F_SKIP_FRAME can be cleared in the
|
||||
* first try, reset it in the second try.
|
||||
*/
|
||||
tr->flags |= BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SKIP_FRAME;
|
||||
} else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_JMP)) {
|
||||
/* Use "jmp" instead of "call" for the trampoline
|
||||
* in the origin call case, and we don't need to
|
||||
* skip the frame.
|
||||
*/
|
||||
tr->flags &= ~BPF_TRAMP_F_SKIP_FRAME;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
|
||||
@@ -465,10 +499,18 @@ again:
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
|
||||
if (bpf_trampoline_use_jmp(tr->flags))
|
||||
tr->fops->flags |= FTRACE_OPS_FL_JMP;
|
||||
else
|
||||
tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
|
||||
#endif
|
||||
|
||||
WARN_ON(tr->cur_image && total == 0);
|
||||
if (tr->cur_image)
|
||||
/* progs already running at this address */
|
||||
err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
|
||||
err = modify_fentry(tr, orig_flags, tr->cur_image->image,
|
||||
im->image, lock_direct_mutex);
|
||||
else
|
||||
/* first time registering */
|
||||
err = register_fentry(tr, im->image);
|
||||
@@ -491,8 +533,15 @@ again:
|
||||
tr->cur_image = im;
|
||||
out:
|
||||
/* If any error happens, restore previous flags */
|
||||
if (err)
|
||||
if (err) {
|
||||
tr->flags = orig_flags;
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
|
||||
if (bpf_trampoline_use_jmp(tr->flags))
|
||||
tr->fops->flags |= FTRACE_OPS_FL_JMP;
|
||||
else
|
||||
tr->fops->flags &= ~FTRACE_OPS_FL_JMP;
|
||||
#endif
|
||||
}
|
||||
kfree(tlinks);
|
||||
return err;
|
||||
|
||||
@@ -568,7 +617,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
|
||||
if (err)
|
||||
return err;
|
||||
tr->extension_prog = link->link.prog;
|
||||
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
|
||||
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP,
|
||||
BPF_MOD_JUMP, NULL,
|
||||
link->link.prog->bpf_func);
|
||||
}
|
||||
if (cnt >= BPF_MAX_TRAMP_LINKS)
|
||||
@@ -616,6 +666,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
|
||||
if (kind == BPF_TRAMP_REPLACE) {
|
||||
WARN_ON_ONCE(!tr->extension_prog);
|
||||
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
|
||||
BPF_MOD_NOP,
|
||||
tr->extension_prog->bpf_func, NULL);
|
||||
tr->extension_prog = NULL;
|
||||
guard(mutex)(&tgt_prog->aux->ext_mutex);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -80,6 +80,12 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
|
||||
If the architecture generates __patchable_function_entries sections
|
||||
but does not want them included in the ftrace locations.
|
||||
|
||||
config HAVE_DYNAMIC_FTRACE_WITH_JMP
|
||||
bool
|
||||
help
|
||||
If the architecture supports to replace the __fentry__ with a
|
||||
"jmp" instruction.
|
||||
|
||||
config HAVE_SYSCALL_TRACEPOINTS
|
||||
bool
|
||||
help
|
||||
@@ -330,6 +336,12 @@ config DYNAMIC_FTRACE_WITH_ARGS
|
||||
depends on DYNAMIC_FTRACE
|
||||
depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
||||
|
||||
config DYNAMIC_FTRACE_WITH_JMP
|
||||
def_bool y
|
||||
depends on DYNAMIC_FTRACE
|
||||
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
depends on HAVE_DYNAMIC_FTRACE_WITH_JMP
|
||||
|
||||
config FPROBE
|
||||
bool "Kernel Function Probe (fprobe)"
|
||||
depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
|
||||
|
||||
@@ -2529,7 +2529,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
|
||||
return run_ctx->entry_ip;
|
||||
}
|
||||
|
||||
static int
|
||||
static __always_inline int
|
||||
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
|
||||
unsigned long entry_ip, struct ftrace_regs *fregs,
|
||||
bool is_return, void *data)
|
||||
@@ -3372,13 +3372,13 @@ typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struc
|
||||
* direct calls into all the specific callback implementations
|
||||
* (copy_user_data_sleepable, copy_user_data_nofault, and so on)
|
||||
*/
|
||||
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
|
||||
static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size,
|
||||
const void *unsafe_src,
|
||||
copy_fn_t str_copy_fn,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct bpf_dynptr_kern *dst;
|
||||
u32 chunk_sz, off;
|
||||
u64 chunk_sz, off;
|
||||
void *dst_slice;
|
||||
int cnt, err;
|
||||
char buf[256];
|
||||
@@ -3392,7 +3392,7 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
|
||||
return -E2BIG;
|
||||
|
||||
for (off = 0; off < size; off += chunk_sz - 1) {
|
||||
chunk_sz = min_t(u32, sizeof(buf), size - off);
|
||||
chunk_sz = min_t(u64, sizeof(buf), size - off);
|
||||
/* Expect str_copy_fn to return count of copied bytes, including
|
||||
* zero terminator. Next iteration increment off by chunk_sz - 1 to
|
||||
* overwrite NUL.
|
||||
@@ -3409,14 +3409,14 @@ static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 do
|
||||
return off;
|
||||
}
|
||||
|
||||
static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
|
||||
u32 size, const void *unsafe_src,
|
||||
static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
|
||||
u64 size, const void *unsafe_src,
|
||||
copy_fn_t copy_fn, struct task_struct *tsk)
|
||||
{
|
||||
struct bpf_dynptr_kern *dst;
|
||||
void *dst_slice;
|
||||
char buf[256];
|
||||
u32 off, chunk_sz;
|
||||
u64 off, chunk_sz;
|
||||
int err;
|
||||
|
||||
dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
|
||||
@@ -3428,7 +3428,7 @@ static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32
|
||||
return -E2BIG;
|
||||
|
||||
for (off = 0; off < size; off += chunk_sz) {
|
||||
chunk_sz = min_t(u32, sizeof(buf), size - off);
|
||||
chunk_sz = min_t(u64, sizeof(buf), size - off);
|
||||
err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -3514,58 +3514,58 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
|
||||
return bpf_send_signal_common(sig, type, task, value);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
copy_user_data_nofault, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
|
||||
copy_kernel_data_nofault, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
copy_user_str_nofault, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
|
||||
copy_kernel_str_nofault, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
copy_user_data_sleepable, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign)
|
||||
__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign)
|
||||
{
|
||||
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
copy_user_str_sleepable, NULL);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign,
|
||||
__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
copy_user_data_sleepable, tsk);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
|
||||
u32 size, const void __user *unsafe_ptr__ign,
|
||||
__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off,
|
||||
u64 size, const void __user *unsafe_ptr__ign,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
|
||||
|
||||
@@ -5951,7 +5951,8 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
|
||||
for (i = 0; i < size; i++) {
|
||||
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
|
||||
del = __ftrace_lookup_ip(direct_functions, entry->ip);
|
||||
if (del && del->direct == addr) {
|
||||
if (del && ftrace_jmp_get(del->direct) ==
|
||||
ftrace_jmp_get(addr)) {
|
||||
remove_hash_entry(direct_functions, del);
|
||||
kfree(del);
|
||||
}
|
||||
@@ -6016,8 +6017,15 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
|
||||
if (ftrace_hash_empty(hash))
|
||||
return -EINVAL;
|
||||
|
||||
/* This is a "raw" address, and this should never happen. */
|
||||
if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&direct_mutex);
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_JMP)
|
||||
addr = ftrace_jmp_set(addr);
|
||||
|
||||
/* Make sure requested entries are not already registered.. */
|
||||
size = 1 << hash->size_bits;
|
||||
for (i = 0; i < size; i++) {
|
||||
@@ -6138,6 +6146,13 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
|
||||
|
||||
lockdep_assert_held_once(&direct_mutex);
|
||||
|
||||
/* This is a "raw" address, and this should never happen. */
|
||||
if (WARN_ON_ONCE(ftrace_is_jmp(addr)))
|
||||
return -EINVAL;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_JMP)
|
||||
addr = ftrace_jmp_set(addr);
|
||||
|
||||
/* Enable the tmp_ops to have the same functions as the direct ops */
|
||||
ftrace_ops_init(&tmp_ops);
|
||||
tmp_ops.func_hash = ops->func_hash;
|
||||
|
||||
@@ -11,27 +11,8 @@
|
||||
|
||||
#define MAX_PHDR_CNT 256
|
||||
|
||||
struct freader {
|
||||
void *buf;
|
||||
u32 buf_sz;
|
||||
int err;
|
||||
union {
|
||||
struct {
|
||||
struct file *file;
|
||||
struct folio *folio;
|
||||
void *addr;
|
||||
loff_t folio_off;
|
||||
bool may_fault;
|
||||
};
|
||||
struct {
|
||||
const char *data;
|
||||
u64 data_sz;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
|
||||
struct file *file, bool may_fault)
|
||||
void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
|
||||
struct file *file, bool may_fault)
|
||||
{
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->buf = buf;
|
||||
@@ -40,7 +21,7 @@ static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
|
||||
r->may_fault = may_fault;
|
||||
}
|
||||
|
||||
static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
|
||||
void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz)
|
||||
{
|
||||
memset(r, 0, sizeof(*r));
|
||||
r->data = data;
|
||||
@@ -92,7 +73,7 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
||||
const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
||||
{
|
||||
size_t folio_sz;
|
||||
|
||||
@@ -127,18 +108,21 @@ static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
||||
*/
|
||||
folio_sz = folio_size(r->folio);
|
||||
if (file_off + sz > r->folio_off + folio_sz) {
|
||||
int part_sz = r->folio_off + folio_sz - file_off;
|
||||
u64 part_sz = r->folio_off + folio_sz - file_off, off;
|
||||
|
||||
/* copy the part that resides in the current folio */
|
||||
memcpy(r->buf, r->addr + (file_off - r->folio_off), part_sz);
|
||||
memcpy(r->buf, r->addr + file_off - r->folio_off, part_sz);
|
||||
off = part_sz;
|
||||
|
||||
/* fetch next folio */
|
||||
r->err = freader_get_folio(r, r->folio_off + folio_sz);
|
||||
if (r->err)
|
||||
return NULL;
|
||||
|
||||
/* copy the rest of requested data */
|
||||
memcpy(r->buf + part_sz, r->addr, sz - part_sz);
|
||||
while (off < sz) {
|
||||
/* fetch next folio */
|
||||
r->err = freader_get_folio(r, r->folio_off + folio_sz);
|
||||
if (r->err)
|
||||
return NULL;
|
||||
folio_sz = folio_size(r->folio);
|
||||
part_sz = min_t(u64, sz - off, folio_sz);
|
||||
memcpy(r->buf + off, r->addr, part_sz);
|
||||
off += part_sz;
|
||||
}
|
||||
|
||||
return r->buf;
|
||||
}
|
||||
@@ -147,7 +131,7 @@ static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz)
|
||||
return r->addr + (file_off - r->folio_off);
|
||||
}
|
||||
|
||||
static void freader_cleanup(struct freader *r)
|
||||
void freader_cleanup(struct freader *r)
|
||||
{
|
||||
if (!r->buf)
|
||||
return; /* non-file-backed mode */
|
||||
|
||||
@@ -436,7 +436,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
||||
|
||||
static int bpf_test_finish(const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr, const void *data,
|
||||
struct skb_shared_info *sinfo, u32 size,
|
||||
struct skb_shared_info *sinfo, u32 size, u32 frag_size,
|
||||
u32 retval, u32 duration)
|
||||
{
|
||||
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
|
||||
@@ -453,7 +453,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
|
||||
}
|
||||
|
||||
if (data_out) {
|
||||
int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
|
||||
int len = sinfo ? copy_size - frag_size : copy_size;
|
||||
|
||||
if (len < 0) {
|
||||
err = -ENOSPC;
|
||||
@@ -899,6 +899,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
|
||||
/* cb is allowed */
|
||||
|
||||
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
|
||||
offsetof(struct __sk_buff, data_end)))
|
||||
return -EINVAL;
|
||||
|
||||
/* data_end is allowed, but not copied to skb */
|
||||
|
||||
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end),
|
||||
offsetof(struct __sk_buff, tstamp)))
|
||||
return -EINVAL;
|
||||
|
||||
@@ -939,6 +945,11 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
|
||||
|
||||
if (__skb->gso_segs > GSO_MAX_SEGS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Currently GSO type is zero/unset. If this gets extended with
|
||||
* a small list of accepted GSO types in future, the filter for
|
||||
* an unset GSO type in bpf_clone_redirect() can be lifted.
|
||||
*/
|
||||
skb_shinfo(skb)->gso_segs = __skb->gso_segs;
|
||||
skb_shinfo(skb)->gso_size = __skb->gso_size;
|
||||
skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
|
||||
@@ -973,46 +984,39 @@ static struct proto bpf_dummy_proto = {
|
||||
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
bool is_l2 = false, is_direct_pkt_access = false;
|
||||
bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
|
||||
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
struct net_device *dev = net->loopback_dev;
|
||||
u32 size = kattr->test.data_size_in;
|
||||
u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
|
||||
u32 linear_sz = kattr->test.data_size_in;
|
||||
u32 repeat = kattr->test.repeat;
|
||||
struct __sk_buff *ctx = NULL;
|
||||
struct sk_buff *skb = NULL;
|
||||
struct sock *sk = NULL;
|
||||
u32 retval, duration;
|
||||
int hh_len = ETH_HLEN;
|
||||
struct sk_buff *skb;
|
||||
struct sock *sk;
|
||||
void *data;
|
||||
void *data = NULL;
|
||||
int ret;
|
||||
|
||||
if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
|
||||
kattr->test.cpu || kattr->test.batch_size)
|
||||
return -EINVAL;
|
||||
|
||||
if (size < ETH_HLEN)
|
||||
if (kattr->test.data_size_in < ETH_HLEN)
|
||||
return -EINVAL;
|
||||
|
||||
data = bpf_test_init(kattr, kattr->test.data_size_in,
|
||||
size, NET_SKB_PAD + NET_IP_ALIGN,
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
|
||||
if (IS_ERR(data))
|
||||
return PTR_ERR(data);
|
||||
|
||||
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
|
||||
if (IS_ERR(ctx)) {
|
||||
kfree(data);
|
||||
return PTR_ERR(ctx);
|
||||
}
|
||||
|
||||
switch (prog->type) {
|
||||
case BPF_PROG_TYPE_SCHED_CLS:
|
||||
case BPF_PROG_TYPE_SCHED_ACT:
|
||||
is_direct_pkt_access = true;
|
||||
is_l2 = true;
|
||||
fallthrough;
|
||||
break;
|
||||
case BPF_PROG_TYPE_LWT_IN:
|
||||
case BPF_PROG_TYPE_LWT_OUT:
|
||||
case BPF_PROG_TYPE_LWT_XMIT:
|
||||
is_lwt = true;
|
||||
fallthrough;
|
||||
case BPF_PROG_TYPE_CGROUP_SKB:
|
||||
is_direct_pkt_access = true;
|
||||
break;
|
||||
@@ -1020,25 +1024,88 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
break;
|
||||
}
|
||||
|
||||
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
|
||||
if (IS_ERR(ctx))
|
||||
return PTR_ERR(ctx);
|
||||
|
||||
if (ctx) {
|
||||
if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (ctx->data_end) {
|
||||
/* Non-linear LWT test_run is unsupported for now. */
|
||||
if (is_lwt) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
linear_sz = max(ETH_HLEN, ctx->data_end);
|
||||
}
|
||||
}
|
||||
|
||||
linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
|
||||
|
||||
data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
|
||||
if (IS_ERR(data)) {
|
||||
ret = PTR_ERR(data);
|
||||
data = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
|
||||
if (!sk) {
|
||||
kfree(data);
|
||||
kfree(ctx);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
sock_init_data(NULL, sk);
|
||||
|
||||
skb = slab_build_skb(data);
|
||||
if (!skb) {
|
||||
kfree(data);
|
||||
kfree(ctx);
|
||||
sk_free(sk);
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
skb->sk = sk;
|
||||
|
||||
data = NULL; /* data released via kfree_skb */
|
||||
|
||||
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
|
||||
__skb_put(skb, size);
|
||||
__skb_put(skb, linear_sz);
|
||||
|
||||
if (unlikely(kattr->test.data_size_in > linear_sz)) {
|
||||
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
|
||||
struct skb_shared_info *sinfo = skb_shinfo(skb);
|
||||
u32 copied = linear_sz;
|
||||
|
||||
while (copied < kattr->test.data_size_in) {
|
||||
struct page *page;
|
||||
u32 data_len;
|
||||
|
||||
if (sinfo->nr_frags == MAX_SKB_FRAGS) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
data_len = min_t(u32, kattr->test.data_size_in - copied,
|
||||
PAGE_SIZE);
|
||||
skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
|
||||
|
||||
if (copy_from_user(page_address(page), data_in + copied,
|
||||
data_len)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
skb->data_len += data_len;
|
||||
skb->truesize += PAGE_SIZE;
|
||||
skb->len += data_len;
|
||||
copied += data_len;
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx && ctx->ifindex > 1) {
|
||||
dev = dev_get_by_index(net, ctx->ifindex);
|
||||
@@ -1118,12 +1185,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
|
||||
convert_skb_to___skb(skb, ctx);
|
||||
|
||||
size = skb->len;
|
||||
/* bpf program can never convert linear skb to non-linear */
|
||||
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
|
||||
size = skb_headlen(skb);
|
||||
ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
|
||||
duration);
|
||||
if (skb_is_nonlinear(skb))
|
||||
/* bpf program can never convert linear skb to non-linear */
|
||||
WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
|
||||
ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
|
||||
skb->data_len, retval, duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, ctx,
|
||||
sizeof(struct __sk_buff));
|
||||
@@ -1131,7 +1197,9 @@ out:
|
||||
if (dev && dev != net->loopback_dev)
|
||||
dev_put(dev);
|
||||
kfree_skb(skb);
|
||||
sk_free(sk);
|
||||
kfree(data);
|
||||
if (sk)
|
||||
sk_free(sk);
|
||||
kfree(ctx);
|
||||
return ret;
|
||||
}
|
||||
@@ -1329,7 +1397,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
goto out;
|
||||
|
||||
size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
|
||||
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
|
||||
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size,
|
||||
retval, duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, ctx,
|
||||
@@ -1420,7 +1488,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
|
||||
goto out;
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
|
||||
sizeof(flow_keys), retval, duration);
|
||||
sizeof(flow_keys), 0, retval, duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
|
||||
sizeof(struct bpf_flow_keys));
|
||||
@@ -1521,7 +1589,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
|
||||
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
|
||||
}
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
|
||||
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
|
||||
if (!ret)
|
||||
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
|
||||
|
||||
@@ -1721,7 +1789,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
|
||||
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
|
||||
|
||||
out:
|
||||
kfree(user_ctx);
|
||||
|
||||
@@ -50,16 +50,14 @@ void bpf_sk_storage_free(struct sock *sk)
|
||||
{
|
||||
struct bpf_local_storage *sk_storage;
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
if (!sk_storage)
|
||||
goto out;
|
||||
|
||||
bpf_local_storage_destroy(sk_storage);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
}
|
||||
|
||||
static void bpf_sk_storage_map_free(struct bpf_map *map)
|
||||
@@ -138,7 +136,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
|
||||
{
|
||||
struct bpf_local_storage_elem *copy_selem;
|
||||
|
||||
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC);
|
||||
copy_selem = bpf_selem_alloc(smap, newsk, NULL, false, GFP_ATOMIC);
|
||||
if (!copy_selem)
|
||||
return NULL;
|
||||
|
||||
@@ -161,8 +159,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
||||
|
||||
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
|
||||
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
rcu_read_lock_dont_migrate();
|
||||
sk_storage = rcu_dereference(sk->sk_bpf_storage);
|
||||
|
||||
if (!sk_storage || hlist_empty(&sk_storage->list))
|
||||
@@ -199,7 +196,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
||||
} else {
|
||||
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
|
||||
if (ret) {
|
||||
bpf_selem_free(copy_selem, smap, true);
|
||||
bpf_selem_free(copy_selem, true);
|
||||
atomic_sub(smap->elem_size,
|
||||
&newsk->sk_omem_alloc);
|
||||
bpf_map_put(map);
|
||||
@@ -213,8 +210,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
|
||||
}
|
||||
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
rcu_read_unlock_migrate();
|
||||
|
||||
/* In case of an error, don't free anything explicitly here, the
|
||||
* caller is responsible to call bpf_sk_storage_free.
|
||||
|
||||
@@ -2458,6 +2458,13 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
|
||||
if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
|
||||
return -EINVAL;
|
||||
|
||||
/* BPF test infra's convert___skb_to_skb() can create type-less
|
||||
* GSO packets. gso_features_check() will detect this as a bad
|
||||
* offload. However, lets not leak them out in the first place.
|
||||
*/
|
||||
if (unlikely(skb_is_gso(skb) && !skb_shinfo(skb)->gso_type))
|
||||
return -EBADMSG;
|
||||
|
||||
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
|
||||
if (unlikely(!dev))
|
||||
return -EINVAL;
|
||||
@@ -6422,9 +6429,12 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
|
||||
*/
|
||||
if (skb_is_gso(skb)) {
|
||||
ret = BPF_MTU_CHK_RET_SUCCESS;
|
||||
if (flags & BPF_MTU_CHK_SEGS &&
|
||||
!skb_gso_validate_network_len(skb, mtu))
|
||||
ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
|
||||
if (flags & BPF_MTU_CHK_SEGS) {
|
||||
if (!skb_transport_header_was_set(skb))
|
||||
return -EINVAL;
|
||||
if (!skb_gso_validate_network_len(skb, mtu))
|
||||
ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
|
||||
}
|
||||
}
|
||||
out:
|
||||
*mtu_len = mtu;
|
||||
|
||||
@@ -112,7 +112,7 @@ function start_hbm () {
|
||||
processArgs () {
|
||||
for i in $args ; do
|
||||
case $i in
|
||||
# Support for upcomming ingress rate limiting
|
||||
# Support for upcoming ingress rate limiting
|
||||
#in) # support for upcoming ingress rate limiting
|
||||
# dir="-i"
|
||||
# dir_name="in"
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*
|
||||
* Example program for Host Bandwidth Managment
|
||||
* Example program for Host Bandwidth Management
|
||||
*
|
||||
* This program loads a cgroup skb BPF program to enforce cgroup output
|
||||
* (egress) or input (ingress) bandwidth limits.
|
||||
@@ -24,7 +24,7 @@
|
||||
* beyond the rate limit specified while there is available
|
||||
* bandwidth. Current implementation assumes there is only
|
||||
* NIC (eth0), but can be extended to support multiple NICs.
|
||||
* Currrently only supported for egress.
|
||||
* Currently only supported for egress.
|
||||
* -h Print this info
|
||||
* prog BPF program file name. Name defaults to hbm_out_kern.o
|
||||
*/
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* License as published by the Free Software Foundation.
|
||||
*
|
||||
* BPF program to set congestion control to dctcp when both hosts are
|
||||
* in the same datacenter (as deteremined by IPv6 prefix).
|
||||
* in the same datacenter (as determined by IPv6 prefix).
|
||||
*
|
||||
* Use "bpftool cgroup attach $cg sock_ops $prog" to load this BPF program.
|
||||
*/
|
||||
|
||||
@@ -20,7 +20,7 @@ SEC("kprobe.multi/__netif_receive_skb_core*")
|
||||
int bpf_prog1(struct pt_regs *ctx)
|
||||
{
|
||||
/* attaches to kprobe __netif_receive_skb_core,
|
||||
* looks for packets on loobpack device and prints them
|
||||
* looks for packets on loopback device and prints them
|
||||
* (wildcard is used for avoiding symbol mismatch due to optimization)
|
||||
*/
|
||||
char devname[IFNAMSIZ];
|
||||
|
||||
@@ -32,7 +32,7 @@ FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
|
||||
FEATURE_DISPLAY = libbfd
|
||||
|
||||
check_feat := 1
|
||||
NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
|
||||
NON_CHECK_FEAT_TARGETS := clean bpftool_clean resolve_btfids_clean
|
||||
ifdef MAKECMDGOALS
|
||||
ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
|
||||
check_feat := 0
|
||||
@@ -70,7 +70,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
|
||||
|
||||
PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
|
||||
|
||||
all: $(PROGS) bpftool runqslower
|
||||
all: $(PROGS) bpftool
|
||||
|
||||
$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
|
||||
$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
|
||||
@@ -86,7 +86,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
|
||||
$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
|
||||
$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
|
||||
|
||||
clean: bpftool_clean runqslower_clean resolve_btfids_clean
|
||||
clean: bpftool_clean resolve_btfids_clean
|
||||
$(call QUIET_CLEAN, bpf-progs)
|
||||
$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
|
||||
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
|
||||
@@ -112,12 +112,6 @@ bpftool_install:
|
||||
bpftool_clean:
|
||||
$(call descend,bpftool,clean)
|
||||
|
||||
runqslower:
|
||||
$(call descend,runqslower)
|
||||
|
||||
runqslower_clean:
|
||||
$(call descend,runqslower,clean)
|
||||
|
||||
resolve_btfids:
|
||||
$(call descend,resolve_btfids)
|
||||
|
||||
@@ -125,5 +119,4 @@ resolve_btfids_clean:
|
||||
$(call descend,resolve_btfids,clean)
|
||||
|
||||
.PHONY: all install clean bpftool bpftool_install bpftool_clean \
|
||||
runqslower runqslower_clean \
|
||||
resolve_btfids resolve_btfids_clean
|
||||
|
||||
@@ -55,7 +55,8 @@ MAP COMMANDS
|
||||
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
|
||||
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
|
||||
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
|
||||
| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
|
||||
| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena**
|
||||
| | **insn_array** }
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
||||
@@ -590,7 +590,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
|
||||
case BTF_KIND_DATASEC:
|
||||
return btf_dumper_datasec(d, type_id, data);
|
||||
default:
|
||||
jsonw_printf(d->jw, "(unsupported-kind");
|
||||
jsonw_printf(d->jw, "(unsupported-kind)");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1477,7 +1477,8 @@ static int do_help(int argc, char **argv)
|
||||
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
|
||||
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
|
||||
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
|
||||
" task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
|
||||
" task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n"
|
||||
" insn_array }\n"
|
||||
" " HELP_SPEC_OPTIONS " |\n"
|
||||
" {-f|--bpffs} | {-n|--nomount} }\n"
|
||||
"",
|
||||
|
||||
@@ -28,6 +28,12 @@
|
||||
|
||||
#define OPEN_SSL_ERR_BUF_LEN 256
|
||||
|
||||
/* Use deprecated in 3.0 ERR_get_error_line_data for openssl < 3 */
|
||||
#if !defined(OPENSSL_VERSION_MAJOR) || (OPENSSL_VERSION_MAJOR < 3)
|
||||
#define ERR_get_error_all(file, line, func, data, flags) \
|
||||
ERR_get_error_line_data(file, line, data, flags)
|
||||
#endif
|
||||
|
||||
static void display_openssl_errors(int l)
|
||||
{
|
||||
char buf[OPEN_SSL_ERR_BUF_LEN];
|
||||
|
||||
2
tools/bpf/runqslower/.gitignore
vendored
2
tools/bpf/runqslower/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/.output
|
||||
@@ -1,91 +0,0 @@
|
||||
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
include ../../scripts/Makefile.include
|
||||
|
||||
OUTPUT ?= $(abspath .output)/
|
||||
|
||||
BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
|
||||
DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
|
||||
BPFTOOL ?= $(DEFAULT_BPFTOOL)
|
||||
BPF_TARGET_ENDIAN ?= --target=bpf
|
||||
LIBBPF_SRC := $(abspath ../../lib/bpf)
|
||||
BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
|
||||
BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
|
||||
BPF_DESTDIR := $(BPFOBJ_OUTPUT)
|
||||
BPF_INCLUDE := $(BPF_DESTDIR)/include
|
||||
INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
|
||||
CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
|
||||
CFLAGS += $(EXTRA_CFLAGS)
|
||||
LDFLAGS += $(EXTRA_LDFLAGS)
|
||||
LDLIBS += -lelf -lz
|
||||
|
||||
# Try to detect best kernel BTF source
|
||||
KERNEL_REL := $(shell uname -r)
|
||||
VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
|
||||
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
|
||||
../../../vmlinux /sys/kernel/btf/vmlinux \
|
||||
/boot/vmlinux-$(KERNEL_REL)
|
||||
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
|
||||
$(wildcard $(VMLINUX_BTF_PATHS))))
|
||||
|
||||
ifneq ($(V),1)
|
||||
MAKEFLAGS += --no-print-directory
|
||||
submake_extras := feature_display=0
|
||||
endif
|
||||
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
.PHONY: all clean runqslower libbpf_hdrs
|
||||
all: runqslower
|
||||
|
||||
runqslower: $(OUTPUT)/runqslower
|
||||
|
||||
clean:
|
||||
$(call QUIET_CLEAN, runqslower)
|
||||
$(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
|
||||
$(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
|
||||
$(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
|
||||
$(Q)$(RM) $(OUTPUT)runqslower
|
||||
$(Q)$(RM) -r .output
|
||||
|
||||
libbpf_hdrs: $(BPFOBJ)
|
||||
|
||||
$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
|
||||
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
|
||||
|
||||
$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \
|
||||
$(OUTPUT)/runqslower.bpf.o | libbpf_hdrs
|
||||
|
||||
$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs
|
||||
|
||||
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
|
||||
$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
|
||||
|
||||
$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
|
||||
$(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \
|
||||
-c $(filter %.c,$^) -o $@ && \
|
||||
$(LLVM_STRIP) -g $@
|
||||
|
||||
$(OUTPUT)/%.o: %.c | $(OUTPUT)
|
||||
$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
|
||||
|
||||
$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
|
||||
$(QUIET_MKDIR)mkdir -p $@
|
||||
|
||||
$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
|
||||
ifeq ($(VMLINUX_H),)
|
||||
$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
|
||||
echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
|
||||
"specify its location." >&2; \
|
||||
exit 1;\
|
||||
fi
|
||||
$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
|
||||
else
|
||||
$(Q)cp "$(VMLINUX_H)" $@
|
||||
endif
|
||||
|
||||
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
|
||||
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \
|
||||
DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers
|
||||
|
||||
$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
|
||||
$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap
|
||||
@@ -1,106 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2019 Facebook
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "runqslower.h"
|
||||
|
||||
#define TASK_RUNNING 0
|
||||
#define BPF_F_CURRENT_CPU 0xffffffffULL
|
||||
|
||||
const volatile __u64 min_us = 0;
|
||||
const volatile pid_t targ_pid = 0;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, u64);
|
||||
} start SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
|
||||
__uint(key_size, sizeof(u32));
|
||||
__uint(value_size, sizeof(u32));
|
||||
} events SEC(".maps");
|
||||
|
||||
/* record enqueue timestamp */
|
||||
__always_inline
|
||||
static int trace_enqueue(struct task_struct *t)
|
||||
{
|
||||
u32 pid = t->pid;
|
||||
u64 *ptr;
|
||||
|
||||
if (!pid || (targ_pid && targ_pid != pid))
|
||||
return 0;
|
||||
|
||||
ptr = bpf_task_storage_get(&start, t, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
return 0;
|
||||
|
||||
*ptr = bpf_ktime_get_ns();
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_wakeup")
|
||||
int handle__sched_wakeup(u64 *ctx)
|
||||
{
|
||||
/* TP_PROTO(struct task_struct *p) */
|
||||
struct task_struct *p = (void *)ctx[0];
|
||||
|
||||
return trace_enqueue(p);
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_wakeup_new")
|
||||
int handle__sched_wakeup_new(u64 *ctx)
|
||||
{
|
||||
/* TP_PROTO(struct task_struct *p) */
|
||||
struct task_struct *p = (void *)ctx[0];
|
||||
|
||||
return trace_enqueue(p);
|
||||
}
|
||||
|
||||
SEC("tp_btf/sched_switch")
|
||||
int handle__sched_switch(u64 *ctx)
|
||||
{
|
||||
/* TP_PROTO(bool preempt, struct task_struct *prev,
|
||||
* struct task_struct *next)
|
||||
*/
|
||||
struct task_struct *prev = (struct task_struct *)ctx[1];
|
||||
struct task_struct *next = (struct task_struct *)ctx[2];
|
||||
struct runq_event event = {};
|
||||
u64 *tsp, delta_us;
|
||||
u32 pid;
|
||||
|
||||
/* ivcsw: treat like an enqueue event and store timestamp */
|
||||
if (prev->__state == TASK_RUNNING)
|
||||
trace_enqueue(prev);
|
||||
|
||||
pid = next->pid;
|
||||
|
||||
/* For pid mismatch, save a bpf_task_storage_get */
|
||||
if (!pid || (targ_pid && targ_pid != pid))
|
||||
return 0;
|
||||
|
||||
/* fetch timestamp and calculate delta */
|
||||
tsp = bpf_task_storage_get(&start, next, 0, 0);
|
||||
if (!tsp)
|
||||
return 0; /* missed enqueue */
|
||||
|
||||
delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
|
||||
if (min_us && delta_us <= min_us)
|
||||
return 0;
|
||||
|
||||
event.pid = pid;
|
||||
event.delta_us = delta_us;
|
||||
bpf_get_current_comm(&event.task, sizeof(event.task));
|
||||
|
||||
/* output */
|
||||
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
|
||||
&event, sizeof(event));
|
||||
|
||||
bpf_task_storage_delete(&start, next);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char LICENSE[] SEC("license") = "GPL";
|
||||
@@ -1,171 +0,0 @@
|
||||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
// Copyright (c) 2019 Facebook
|
||||
#include <argp.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include "runqslower.h"
|
||||
#include "runqslower.skel.h"
|
||||
|
||||
struct env {
|
||||
pid_t pid;
|
||||
__u64 min_us;
|
||||
bool verbose;
|
||||
} env = {
|
||||
.min_us = 10000,
|
||||
};
|
||||
|
||||
const char *argp_program_version = "runqslower 0.1";
|
||||
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
|
||||
const char argp_program_doc[] =
|
||||
"runqslower Trace long process scheduling delays.\n"
|
||||
" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n"
|
||||
"\n"
|
||||
"This script traces high scheduling delays between tasks being\n"
|
||||
"ready to run and them running on CPU after that.\n"
|
||||
"\n"
|
||||
"USAGE: runqslower [-p PID] [min_us]\n"
|
||||
"\n"
|
||||
"EXAMPLES:\n"
|
||||
" runqslower # trace run queue latency higher than 10000 us (default)\n"
|
||||
" runqslower 1000 # trace run queue latency higher than 1000 us\n"
|
||||
" runqslower -p 123 # trace pid 123 only\n";
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
{ "pid", 'p', "PID", 0, "Process PID to trace"},
|
||||
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
|
||||
{},
|
||||
};
|
||||
|
||||
static error_t parse_arg(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
static int pos_args;
|
||||
int pid;
|
||||
long long min_us;
|
||||
|
||||
switch (key) {
|
||||
case 'v':
|
||||
env.verbose = true;
|
||||
break;
|
||||
case 'p':
|
||||
errno = 0;
|
||||
pid = strtol(arg, NULL, 10);
|
||||
if (errno || pid <= 0) {
|
||||
fprintf(stderr, "Invalid PID: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
env.pid = pid;
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (pos_args++) {
|
||||
fprintf(stderr,
|
||||
"Unrecognized positional argument: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
errno = 0;
|
||||
min_us = strtoll(arg, NULL, 10);
|
||||
if (errno || min_us <= 0) {
|
||||
fprintf(stderr, "Invalid delay (in us): %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
env.min_us = min_us;
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int libbpf_print_fn(enum libbpf_print_level level,
|
||||
const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !env.verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
|
||||
{
|
||||
const struct runq_event *e = data;
|
||||
struct tm *tm;
|
||||
char ts[32];
|
||||
time_t t;
|
||||
|
||||
time(&t);
|
||||
tm = localtime(&t);
|
||||
strftime(ts, sizeof(ts), "%H:%M:%S", tm);
|
||||
printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us);
|
||||
}
|
||||
|
||||
void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
|
||||
{
|
||||
printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
static const struct argp argp = {
|
||||
.options = opts,
|
||||
.parser = parse_arg,
|
||||
.doc = argp_program_doc,
|
||||
};
|
||||
struct perf_buffer *pb = NULL;
|
||||
struct runqslower_bpf *obj;
|
||||
int err;
|
||||
|
||||
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
|
||||
/* Use libbpf 1.0 API mode */
|
||||
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
|
||||
|
||||
obj = runqslower_bpf__open();
|
||||
if (!obj) {
|
||||
fprintf(stderr, "failed to open and/or load BPF object\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* initialize global data (filtering options) */
|
||||
obj->rodata->targ_pid = env.pid;
|
||||
obj->rodata->min_us = env.min_us;
|
||||
|
||||
err = runqslower_bpf__load(obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to load BPF object: %d\n", err);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
err = runqslower_bpf__attach(obj);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to attach BPF programs\n");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
printf("Tracing run queue latency higher than %llu us\n", env.min_us);
|
||||
printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
|
||||
|
||||
pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
|
||||
handle_event, handle_lost_events, NULL, NULL);
|
||||
err = libbpf_get_error(pb);
|
||||
if (err) {
|
||||
pb = NULL;
|
||||
fprintf(stderr, "failed to open perf buffer: %d\n", err);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
while ((err = perf_buffer__poll(pb, 100)) >= 0)
|
||||
;
|
||||
printf("Error polling perf buffer: %d\n", err);
|
||||
|
||||
cleanup:
|
||||
perf_buffer__free(pb);
|
||||
runqslower_bpf__destroy(obj);
|
||||
|
||||
return err != 0;
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||
#ifndef __RUNQSLOWER_H
|
||||
#define __RUNQSLOWER_H
|
||||
|
||||
#define TASK_COMM_LEN 16
|
||||
|
||||
struct runq_event {
|
||||
char task[TASK_COMM_LEN];
|
||||
__u64 delta_us;
|
||||
pid_t pid;
|
||||
};
|
||||
|
||||
#endif /* __RUNQSLOWER_H */
|
||||
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_USER_RINGBUF,
|
||||
BPF_MAP_TYPE_CGRP_STORAGE,
|
||||
BPF_MAP_TYPE_ARENA,
|
||||
BPF_MAP_TYPE_INSN_ARRAY,
|
||||
__MAX_BPF_MAP_TYPE
|
||||
};
|
||||
|
||||
@@ -1430,6 +1431,9 @@ enum {
|
||||
|
||||
/* Do not translate kernel bpf_arena pointers to user pointers */
|
||||
BPF_F_NO_USER_CONV = (1U << 18),
|
||||
|
||||
/* Enable BPF ringbuf overwrite mode */
|
||||
BPF_F_RB_OVERWRITE = (1U << 19),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
@@ -5618,7 +5622,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* *sk* if casting is valid, or **NULL** otherwise.
|
||||
*
|
||||
* long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
|
||||
* long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
|
||||
* Description
|
||||
* Get a dynptr to local memory *data*.
|
||||
*
|
||||
@@ -5661,7 +5665,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
|
||||
* Description
|
||||
* Read *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *src*.
|
||||
@@ -5671,7 +5675,7 @@ union bpf_attr {
|
||||
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
|
||||
* *flags* is not 0.
|
||||
*
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
|
||||
* Description
|
||||
* Write *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *dst*.
|
||||
@@ -5692,7 +5696,7 @@ union bpf_attr {
|
||||
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
|
||||
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
|
||||
*
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
|
||||
* Description
|
||||
* Get a pointer to the underlying dynptr data.
|
||||
*
|
||||
@@ -6231,6 +6235,7 @@ enum {
|
||||
BPF_RB_RING_SIZE = 1,
|
||||
BPF_RB_CONS_POS = 2,
|
||||
BPF_RB_PROD_POS = 3,
|
||||
BPF_RB_OVERWRITE_POS = 4,
|
||||
};
|
||||
|
||||
/* BPF ring buffer constants */
|
||||
@@ -7645,4 +7650,24 @@ enum bpf_kfunc_flags {
|
||||
BPF_F_PAD_ZEROS = (1ULL << 0),
|
||||
};
|
||||
|
||||
/*
|
||||
* Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
|
||||
*
|
||||
* Before the map is used the orig_off field should point to an
|
||||
* instruction inside the program being loaded. The other fields
|
||||
* must be set to 0.
|
||||
*
|
||||
* After the program is loaded, the xlated_off will be adjusted
|
||||
* by the verifier to point to the index of the original instruction
|
||||
* in the xlated program. If the instruction is deleted, it will
|
||||
* be set to (u32)-1. The jitted_off will be set to the corresponding
|
||||
* offset in the jitted image of the program.
|
||||
*/
|
||||
struct bpf_insn_array_value {
|
||||
__u32 orig_off;
|
||||
__u32 xlated_off;
|
||||
__u32 jitted_off;
|
||||
__u32 :32;
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
||||
@@ -154,7 +154,7 @@ int bump_rlimit_memlock(void)
|
||||
|
||||
memlock_bumped = true;
|
||||
|
||||
/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
|
||||
/* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */
|
||||
if (memlock_rlim == 0)
|
||||
return 0;
|
||||
|
||||
|
||||
@@ -1061,7 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
|
||||
if (base_btf) {
|
||||
btf->base_btf = base_btf;
|
||||
btf->start_id = btf__type_cnt(base_btf);
|
||||
btf->start_str_off = base_btf->hdr->str_len;
|
||||
btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
|
||||
}
|
||||
|
||||
if (is_mmap) {
|
||||
@@ -3901,6 +3901,20 @@ err_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate type signature hash of TYPEDEF, ignoring referenced type IDs,
|
||||
* as referenced type IDs equivalence is established separately during type
|
||||
* graph equivalence check algorithm.
|
||||
*/
|
||||
static long btf_hash_typedef(struct btf_type *t)
|
||||
{
|
||||
long h;
|
||||
|
||||
h = hash_combine(0, t->name_off);
|
||||
h = hash_combine(h, t->info);
|
||||
return h;
|
||||
}
|
||||
|
||||
static long btf_hash_common(struct btf_type *t)
|
||||
{
|
||||
long h;
|
||||
@@ -3918,6 +3932,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
|
||||
t1->size == t2->size;
|
||||
}
|
||||
|
||||
/* Check structural compatibility of two TYPEDEF. */
|
||||
static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2)
|
||||
{
|
||||
return t1->name_off == t2->name_off &&
|
||||
t1->info == t2->info;
|
||||
}
|
||||
|
||||
/* Calculate type signature hash of INT or TAG. */
|
||||
static long btf_hash_int_decl_tag(struct btf_type *t)
|
||||
{
|
||||
@@ -4844,13 +4865,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
|
||||
}
|
||||
}
|
||||
|
||||
static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind)
|
||||
{
|
||||
if (kind == BTF_KIND_TYPEDEF)
|
||||
return btf_hash_typedef(t);
|
||||
else
|
||||
return btf_hash_struct(t);
|
||||
}
|
||||
|
||||
static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind)
|
||||
{
|
||||
if (kind == BTF_KIND_TYPEDEF)
|
||||
return btf_equal_typedef(t1, t2);
|
||||
else
|
||||
return btf_shallow_equal_struct(t1, t2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deduplicate struct/union types.
|
||||
* Deduplicate struct/union and typedef types.
|
||||
*
|
||||
* For each struct/union type its type signature hash is calculated, taking
|
||||
* into account type's name, size, number, order and names of fields, but
|
||||
* ignoring type ID's referenced from fields, because they might not be deduped
|
||||
* completely until after reference types deduplication phase. This type hash
|
||||
* completely until after reference types deduplication phase. For each typedef
|
||||
* type, the hash is computed based on the type’s name and size. This type hash
|
||||
* is used to iterate over all potential canonical types, sharing same hash.
|
||||
* For each canonical candidate we check whether type graphs that they form
|
||||
* (through referenced types in fields and so on) are equivalent using algorithm
|
||||
@@ -4882,18 +4920,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
|
||||
t = btf_type_by_id(d->btf, type_id);
|
||||
kind = btf_kind(t);
|
||||
|
||||
if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
|
||||
if (kind != BTF_KIND_STRUCT &&
|
||||
kind != BTF_KIND_UNION &&
|
||||
kind != BTF_KIND_TYPEDEF)
|
||||
return 0;
|
||||
|
||||
h = btf_hash_struct(t);
|
||||
h = btf_hash_by_kind(t, kind);
|
||||
for_each_dedup_cand(d, hash_entry, h) {
|
||||
__u32 cand_id = hash_entry->value;
|
||||
int eq;
|
||||
|
||||
/*
|
||||
* Even though btf_dedup_is_equiv() checks for
|
||||
* btf_shallow_equal_struct() internally when checking two
|
||||
* structs (unions) for equivalence, we need to guard here
|
||||
* btf_equal_by_kind() internally when checking two
|
||||
* structs (unions) or typedefs for equivalence, we need to guard here
|
||||
* from picking matching FWD type as a dedup candidate.
|
||||
* This can happen due to hash collision. In such case just
|
||||
* relying on btf_dedup_is_equiv() would lead to potentially
|
||||
@@ -4901,7 +4941,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
|
||||
* FWD and compatible STRUCT/UNION are considered equivalent.
|
||||
*/
|
||||
cand_type = btf_type_by_id(d->btf, cand_id);
|
||||
if (!btf_shallow_equal_struct(t, cand_type))
|
||||
if (!btf_equal_by_kind(t, cand_type, kind))
|
||||
continue;
|
||||
|
||||
btf_dedup_clear_hypot_map(d);
|
||||
@@ -4939,18 +4979,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
|
||||
/*
|
||||
* Deduplicate reference type.
|
||||
*
|
||||
* Once all primitive and struct/union types got deduplicated, we can easily
|
||||
* Once all primitive, struct/union and typedef types got deduplicated, we can easily
|
||||
* deduplicate all other (reference) BTF types. This is done in two steps:
|
||||
*
|
||||
* 1. Resolve all referenced type IDs into their canonical type IDs. This
|
||||
* resolution can be done either immediately for primitive or struct/union types
|
||||
* (because they were deduped in previous two phases) or recursively for
|
||||
* resolution can be done either immediately for primitive, struct/union, and typedef
|
||||
* types (because they were deduped in previous two phases) or recursively for
|
||||
* reference types. Recursion will always terminate at either primitive or
|
||||
* struct/union type, at which point we can "unwind" chain of reference types
|
||||
* one by one. There is no danger of encountering cycles because in C type
|
||||
* system the only way to form type cycle is through struct/union, so any chain
|
||||
* of reference types, even those taking part in a type cycle, will inevitably
|
||||
* reach struct/union at some point.
|
||||
* struct/union and typedef types, at which point we can "unwind" chain of reference
|
||||
* types one by one. There is no danger of encountering cycles in C, as the only way to
|
||||
* form a type cycle is through struct or union types. Go can form such cycles through
|
||||
* typedef. Thus, any chain of reference types, even those taking part in a type cycle,
|
||||
* will inevitably reach a struct/union or typedef type at some point.
|
||||
*
|
||||
* 2. Once all referenced type IDs are resolved into canonical ones, BTF type
|
||||
* becomes "stable", in the sense that no further deduplication will cause
|
||||
@@ -4982,7 +5022,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
|
||||
case BTF_KIND_VOLATILE:
|
||||
case BTF_KIND_RESTRICT:
|
||||
case BTF_KIND_PTR:
|
||||
case BTF_KIND_TYPEDEF:
|
||||
case BTF_KIND_FUNC:
|
||||
case BTF_KIND_TYPE_TAG:
|
||||
ref_type_id = btf_dedup_ref_type(d, t->type);
|
||||
@@ -5818,7 +5857,7 @@ void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
|
||||
{
|
||||
btf->base_btf = (struct btf *)base_btf;
|
||||
btf->start_id = btf__type_cnt(base_btf);
|
||||
btf->start_str_off = base_btf->hdr->str_len;
|
||||
btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
|
||||
}
|
||||
|
||||
int btf__relocate(struct btf *btf, const struct btf *base_btf)
|
||||
|
||||
@@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void);
|
||||
* @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
|
||||
* ELF BTF section except with a base BTF on top of which split BTF should be
|
||||
* based
|
||||
* @param base_btf base BTF object
|
||||
* @return new BTF object instance which has to be eventually freed with
|
||||
* **btf__free()**
|
||||
*
|
||||
@@ -115,6 +116,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
|
||||
* When that split BTF is loaded against a (possibly changed) base, this
|
||||
* distilled base BTF will help update references to that (possibly changed)
|
||||
* base BTF.
|
||||
* @param src_btf source split BTF object
|
||||
* @param new_base_btf pointer to where the new base BTF object pointer will be stored
|
||||
* @param new_split_btf pointer to where the new split BTF object pointer will be stored
|
||||
* @return 0 on success; negative error code, otherwise
|
||||
*
|
||||
* Both the new split and its associated new base BTF must be freed by
|
||||
* the caller.
|
||||
@@ -264,6 +269,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
|
||||
* to base BTF kinds, and verify those references are compatible with
|
||||
* *base_btf*; if they are, *btf* is adjusted such that is re-parented to
|
||||
* *base_btf* and type ids and strings are adjusted to accommodate this.
|
||||
* @param btf split BTF object to relocate
|
||||
* @param base_btf base BTF object
|
||||
* @return 0 on success; negative error code, otherwise
|
||||
*
|
||||
* If successful, 0 is returned and **btf** now has **base_btf** as its
|
||||
* base.
|
||||
|
||||
@@ -190,6 +190,7 @@ static const char * const map_type_name[] = {
|
||||
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
|
||||
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
|
||||
[BPF_MAP_TYPE_ARENA] = "arena",
|
||||
[BPF_MAP_TYPE_INSN_ARRAY] = "insn_array",
|
||||
};
|
||||
|
||||
static const char * const prog_type_name[] = {
|
||||
@@ -369,6 +370,7 @@ enum reloc_type {
|
||||
RELO_EXTERN_CALL,
|
||||
RELO_SUBPROG_ADDR,
|
||||
RELO_CORE,
|
||||
RELO_INSN_ARRAY,
|
||||
};
|
||||
|
||||
struct reloc_desc {
|
||||
@@ -379,7 +381,16 @@ struct reloc_desc {
|
||||
struct {
|
||||
int map_idx;
|
||||
int sym_off;
|
||||
int ext_idx;
|
||||
/*
|
||||
* The following two fields can be unionized, as the
|
||||
* ext_idx field is used for extern symbols, and the
|
||||
* sym_size is used for jump tables, which are never
|
||||
* extern
|
||||
*/
|
||||
union {
|
||||
int ext_idx;
|
||||
int sym_size;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -421,6 +432,11 @@ struct bpf_sec_def {
|
||||
libbpf_prog_attach_fn_t prog_attach_fn;
|
||||
};
|
||||
|
||||
struct bpf_light_subprog {
|
||||
__u32 sec_insn_off;
|
||||
__u32 sub_insn_off;
|
||||
};
|
||||
|
||||
/*
|
||||
* bpf_prog should be a better name but it has been used in
|
||||
* linux/filter.h.
|
||||
@@ -494,6 +510,9 @@ struct bpf_program {
|
||||
__u32 line_info_cnt;
|
||||
__u32 prog_flags;
|
||||
__u8 hash[SHA256_DIGEST_LENGTH];
|
||||
|
||||
struct bpf_light_subprog *subprogs;
|
||||
__u32 subprog_cnt;
|
||||
};
|
||||
|
||||
struct bpf_struct_ops {
|
||||
@@ -667,6 +686,7 @@ struct elf_state {
|
||||
int symbols_shndx;
|
||||
bool has_st_ops;
|
||||
int arena_data_shndx;
|
||||
int jumptables_data_shndx;
|
||||
};
|
||||
|
||||
struct usdt_manager;
|
||||
@@ -738,6 +758,16 @@ struct bpf_object {
|
||||
void *arena_data;
|
||||
size_t arena_data_sz;
|
||||
|
||||
void *jumptables_data;
|
||||
size_t jumptables_data_sz;
|
||||
|
||||
struct {
|
||||
struct bpf_program *prog;
|
||||
int sym_off;
|
||||
int fd;
|
||||
} *jumptable_maps;
|
||||
size_t jumptable_map_cnt;
|
||||
|
||||
struct kern_feature_cache *feat_cache;
|
||||
char *token_path;
|
||||
int token_fd;
|
||||
@@ -764,6 +794,7 @@ void bpf_program__unload(struct bpf_program *prog)
|
||||
|
||||
zfree(&prog->func_info);
|
||||
zfree(&prog->line_info);
|
||||
zfree(&prog->subprogs);
|
||||
}
|
||||
|
||||
static void bpf_program__exit(struct bpf_program *prog)
|
||||
@@ -2996,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
|
||||
|
||||
scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
|
||||
data = elf_sec_data(obj, scn);
|
||||
if (!scn || !data) {
|
||||
if (!data) {
|
||||
pr_warn("elf: failed to get %s map definitions for %s\n",
|
||||
MAPS_ELF_SEC, obj->path);
|
||||
return -EINVAL;
|
||||
@@ -3942,6 +3973,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
|
||||
} else if (strcmp(name, ARENA_SEC) == 0) {
|
||||
obj->efile.arena_data = data;
|
||||
obj->efile.arena_data_shndx = idx;
|
||||
} else if (strcmp(name, JUMPTABLES_SEC) == 0) {
|
||||
obj->jumptables_data = malloc(data->d_size);
|
||||
if (!obj->jumptables_data)
|
||||
return -ENOMEM;
|
||||
memcpy(obj->jumptables_data, data->d_buf, data->d_size);
|
||||
obj->jumptables_data_sz = data->d_size;
|
||||
obj->efile.jumptables_data_shndx = idx;
|
||||
} else {
|
||||
pr_info("elf: skipping unrecognized data section(%d) %s\n",
|
||||
idx, name);
|
||||
@@ -4634,6 +4672,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* jump table data relocation */
|
||||
if (shdr_idx == obj->efile.jumptables_data_shndx) {
|
||||
reloc_desc->type = RELO_INSN_ARRAY;
|
||||
reloc_desc->insn_idx = insn_idx;
|
||||
reloc_desc->map_idx = -1;
|
||||
reloc_desc->sym_off = sym->st_value;
|
||||
reloc_desc->sym_size = sym->st_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* generic map reference relocation */
|
||||
if (type == LIBBPF_MAP_UNSPEC) {
|
||||
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
|
||||
@@ -6144,6 +6192,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
|
||||
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
|
||||
}
|
||||
|
||||
static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < obj->jumptable_map_cnt; i++) {
|
||||
/*
|
||||
* This might happen that same offset is used for two different
|
||||
* programs (as jump tables can be the same). However, for
|
||||
* different programs different maps should be created.
|
||||
*/
|
||||
if (obj->jumptable_maps[i].sym_off == sym_off &&
|
||||
obj->jumptable_maps[i].prog == prog)
|
||||
return obj->jumptable_maps[i].fd;
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
|
||||
{
|
||||
size_t cnt = obj->jumptable_map_cnt;
|
||||
size_t size = sizeof(obj->jumptable_maps[0]);
|
||||
void *tmp;
|
||||
|
||||
tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
obj->jumptable_maps = tmp;
|
||||
obj->jumptable_maps[cnt].prog = prog;
|
||||
obj->jumptable_maps[cnt].sym_off = sym_off;
|
||||
obj->jumptable_maps[cnt].fd = map_fd;
|
||||
obj->jumptable_map_cnt++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = prog->subprog_cnt - 1; i >= 0; i--) {
|
||||
if (insn_idx >= prog->subprogs[i].sub_insn_off)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
|
||||
{
|
||||
const __u32 jt_entry_size = 8;
|
||||
int sym_off = relo->sym_off;
|
||||
int jt_size = relo->sym_size;
|
||||
__u32 max_entries = jt_size / jt_entry_size;
|
||||
__u32 value_size = sizeof(struct bpf_insn_array_value);
|
||||
struct bpf_insn_array_value val = {};
|
||||
int subprog_idx;
|
||||
int map_fd, err;
|
||||
__u64 insn_off;
|
||||
__u64 *jt;
|
||||
__u32 i;
|
||||
|
||||
map_fd = find_jt_map(obj, prog, sym_off);
|
||||
if (map_fd >= 0)
|
||||
return map_fd;
|
||||
|
||||
if (sym_off % jt_entry_size) {
|
||||
pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
|
||||
sym_off, jt_entry_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (jt_size % jt_entry_size) {
|
||||
pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
|
||||
jt_size, jt_entry_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
|
||||
4, value_size, max_entries, NULL);
|
||||
if (map_fd < 0)
|
||||
return map_fd;
|
||||
|
||||
if (!obj->jumptables_data) {
|
||||
pr_warn("map '.jumptables': ELF file is missing jump table data\n");
|
||||
err = -EINVAL;
|
||||
goto err_close;
|
||||
}
|
||||
if (sym_off + jt_size > obj->jumptables_data_sz) {
|
||||
pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
|
||||
obj->jumptables_data_sz, sym_off + jt_size);
|
||||
err = -EINVAL;
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
subprog_idx = -1; /* main program */
|
||||
if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
|
||||
pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
|
||||
err = -EINVAL;
|
||||
goto err_close;
|
||||
}
|
||||
if (prog->subprogs)
|
||||
subprog_idx = find_subprog_idx(prog, relo->insn_idx);
|
||||
|
||||
jt = (__u64 *)(obj->jumptables_data + sym_off);
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
/*
|
||||
* The offset should be made to be relative to the beginning of
|
||||
* the main function, not the subfunction.
|
||||
*/
|
||||
insn_off = jt[i]/sizeof(struct bpf_insn);
|
||||
if (subprog_idx >= 0) {
|
||||
insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
|
||||
insn_off += prog->subprogs[subprog_idx].sub_insn_off;
|
||||
} else {
|
||||
insn_off -= prog->sec_insn_off;
|
||||
}
|
||||
|
||||
/*
|
||||
* LLVM-generated jump tables contain u64 records, however
|
||||
* should contain values that fit in u32.
|
||||
*/
|
||||
if (insn_off > UINT32_MAX) {
|
||||
pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
|
||||
(long long)jt[i], sym_off + i * jt_entry_size);
|
||||
err = -EINVAL;
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
val.orig_off = insn_off;
|
||||
err = bpf_map_update_elem(map_fd, &i, &val, 0);
|
||||
if (err)
|
||||
goto err_close;
|
||||
}
|
||||
|
||||
err = bpf_map_freeze(map_fd);
|
||||
if (err)
|
||||
goto err_close;
|
||||
|
||||
err = add_jt_map(obj, prog, sym_off, map_fd);
|
||||
if (err)
|
||||
goto err_close;
|
||||
|
||||
return map_fd;
|
||||
|
||||
err_close:
|
||||
close(map_fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Relocate data references within program code:
|
||||
* - map references;
|
||||
* - global variable references;
|
||||
@@ -6235,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
|
||||
case RELO_CORE:
|
||||
/* will be handled by bpf_program_record_relos() */
|
||||
break;
|
||||
case RELO_INSN_ARRAY: {
|
||||
int map_fd;
|
||||
|
||||
map_fd = create_jt_map(obj, prog, relo);
|
||||
if (map_fd < 0) {
|
||||
pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
|
||||
prog->name, i, relo->sym_off);
|
||||
return map_fd;
|
||||
}
|
||||
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
|
||||
insn->imm = map_fd;
|
||||
insn->off = 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
|
||||
prog->name, i, relo->type);
|
||||
@@ -6432,36 +6645,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
|
||||
{
|
||||
size_t size = sizeof(main_prog->subprogs[0]);
|
||||
int cnt = main_prog->subprog_cnt;
|
||||
void *tmp;
|
||||
|
||||
tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
main_prog->subprogs = tmp;
|
||||
main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
|
||||
main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
|
||||
main_prog->subprog_cnt++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
|
||||
struct bpf_program *subprog)
|
||||
{
|
||||
struct bpf_insn *insns;
|
||||
size_t new_cnt;
|
||||
int err;
|
||||
struct bpf_insn *insns;
|
||||
size_t new_cnt;
|
||||
int err;
|
||||
|
||||
subprog->sub_insn_off = main_prog->insns_cnt;
|
||||
subprog->sub_insn_off = main_prog->insns_cnt;
|
||||
|
||||
new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
|
||||
insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
|
||||
if (!insns) {
|
||||
pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
|
||||
return -ENOMEM;
|
||||
}
|
||||
main_prog->insns = insns;
|
||||
main_prog->insns_cnt = new_cnt;
|
||||
new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
|
||||
insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
|
||||
if (!insns) {
|
||||
pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
|
||||
return -ENOMEM;
|
||||
}
|
||||
main_prog->insns = insns;
|
||||
main_prog->insns_cnt = new_cnt;
|
||||
|
||||
memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
|
||||
subprog->insns_cnt * sizeof(*insns));
|
||||
memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
|
||||
subprog->insns_cnt * sizeof(*insns));
|
||||
|
||||
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
|
||||
main_prog->name, subprog->insns_cnt, subprog->name);
|
||||
pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
|
||||
main_prog->name, subprog->insns_cnt, subprog->name);
|
||||
|
||||
/* The subprog insns are now appended. Append its relos too. */
|
||||
err = append_subprog_relos(main_prog, subprog);
|
||||
if (err)
|
||||
return err;
|
||||
return 0;
|
||||
/* The subprog insns are now appended. Append its relos too. */
|
||||
err = append_subprog_relos(main_prog, subprog);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = save_subprog_offsets(main_prog, subprog);
|
||||
if (err) {
|
||||
pr_warn("prog '%s': failed to add subprog offsets: %s\n",
|
||||
main_prog->name, errstr(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -9228,6 +9467,13 @@ void bpf_object__close(struct bpf_object *obj)
|
||||
|
||||
zfree(&obj->arena_data);
|
||||
|
||||
zfree(&obj->jumptables_data);
|
||||
obj->jumptables_data_sz = 0;
|
||||
|
||||
for (i = 0; i < obj->jumptable_map_cnt; i++)
|
||||
close(obj->jumptable_maps[i].fd);
|
||||
zfree(&obj->jumptable_maps);
|
||||
|
||||
free(obj);
|
||||
}
|
||||
|
||||
@@ -13854,8 +14100,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
if (attach_prog_fd && !attach_func_name) {
|
||||
/* remember attach_prog_fd and let bpf_program__load() find
|
||||
* BTF ID during the program load
|
||||
/* Store attach_prog_fd. The BTF ID will be resolved later during
|
||||
* the normal object/program load phase.
|
||||
*/
|
||||
prog->attach_prog_fd = attach_prog_fd;
|
||||
return 0;
|
||||
|
||||
@@ -448,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
|
||||
|
||||
/**
|
||||
* @brief **bpf_program__unpin()** unpins the BPF program from a file
|
||||
* in the BPFFS specified by a path. This decrements the programs
|
||||
* in the BPFFS specified by a path. This decrements program's in-kernel
|
||||
* reference count.
|
||||
*
|
||||
* The file pinning the BPF program can also be unlinked by a different
|
||||
@@ -481,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
|
||||
|
||||
/**
|
||||
* @brief **bpf_link__unpin()** unpins the BPF link from a file
|
||||
* in the BPFFS specified by a path. This decrements the links
|
||||
* reference count.
|
||||
* in the BPFFS. This decrements link's in-kernel reference count.
|
||||
*
|
||||
* The file pinning the BPF link can also be unlinked by a different
|
||||
* process in which case this function will return an error.
|
||||
*
|
||||
* @param prog BPF program to unpin
|
||||
* @param path file path to the pin in a BPF file system
|
||||
* @param link BPF link to unpin
|
||||
* @return 0, on success; negative error code, otherwise
|
||||
*/
|
||||
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
|
||||
@@ -995,8 +993,13 @@ LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
|
||||
* - fentry/fexit/fmod_ret;
|
||||
* - lsm;
|
||||
* - freplace.
|
||||
* @param prog BPF program to set the attach type for
|
||||
* @param type attach type to set the BPF map to have
|
||||
* @param prog BPF program to configure; must be not yet loaded.
|
||||
* @param attach_prog_fd FD of target BPF program (for freplace/extension).
|
||||
* If >0 and func name omitted, defers BTF ID resolution.
|
||||
* @param attach_func_name Target function name. Used either with
|
||||
* attach_prog_fd to find destination BTF type ID in that BPF program, or
|
||||
* alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID.
|
||||
* Must be provided if attach_prog_fd is 0.
|
||||
* @return error code; or 0 if no error occurred.
|
||||
*/
|
||||
LIBBPF_API int
|
||||
@@ -1098,6 +1101,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
|
||||
/**
|
||||
* @brief **bpf_map__set_value_size()** sets map value size.
|
||||
* @param map the BPF map instance
|
||||
* @param size the new value size
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* There is a special case for maps with associated memory-mapped regions, like
|
||||
@@ -1202,7 +1206,7 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
|
||||
* per-CPU values value size has to be aligned up to closest 8 bytes for
|
||||
* alignment reasons, so expected size is: `round_up(value_size, 8)
|
||||
* * libbpf_num_possible_cpus()`.
|
||||
* @flags extra flags passed to kernel for this operation
|
||||
* @param flags extra flags passed to kernel for this operation
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* **bpf_map__lookup_elem()** is high-level equivalent of
|
||||
@@ -1226,7 +1230,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
|
||||
* per-CPU values value size has to be aligned up to closest 8 bytes for
|
||||
* alignment reasons, so expected size is: `round_up(value_size, 8)
|
||||
* * libbpf_num_possible_cpus()`.
|
||||
* @flags extra flags passed to kernel for this operation
|
||||
* @param flags extra flags passed to kernel for this operation
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* **bpf_map__update_elem()** is high-level equivalent of
|
||||
@@ -1242,7 +1246,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
|
||||
* @param map BPF map to delete element from
|
||||
* @param key pointer to memory containing bytes of the key
|
||||
* @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
|
||||
* @flags extra flags passed to kernel for this operation
|
||||
* @param flags extra flags passed to kernel for this operation
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* **bpf_map__delete_elem()** is high-level equivalent of
|
||||
@@ -1265,7 +1269,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
|
||||
* per-CPU values value size has to be aligned up to closest 8 bytes for
|
||||
* alignment reasons, so expected size is: `round_up(value_size, 8)
|
||||
* * libbpf_num_possible_cpus()`.
|
||||
* @flags extra flags passed to kernel for this operation
|
||||
* @param flags extra flags passed to kernel for this operation
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
|
||||
@@ -1637,6 +1641,7 @@ struct perf_buffer_opts {
|
||||
* @param sample_cb function called on each received data record
|
||||
* @param lost_cb function called when record loss has occurred
|
||||
* @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
|
||||
* @param opts optional parameters for the perf buffer, can be null
|
||||
* @return a new instance of struct perf_buffer on success, NULL on error with
|
||||
* *errno* containing an error code
|
||||
*/
|
||||
|
||||
@@ -74,6 +74,8 @@
|
||||
#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
|
||||
#endif
|
||||
|
||||
#define JUMPTABLES_SEC ".jumptables"
|
||||
|
||||
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
|
||||
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
|
||||
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
|
||||
|
||||
@@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type)
|
||||
case BPF_MAP_TYPE_SOCKHASH:
|
||||
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
|
||||
break;
|
||||
case BPF_MAP_TYPE_INSN_ARRAY:
|
||||
key_size = sizeof(__u32);
|
||||
value_size = sizeof(struct bpf_insn_array_value);
|
||||
break;
|
||||
case BPF_MAP_TYPE_UNSPEC:
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
@@ -2025,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
|
||||
obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0)
|
||||
goto add_sym;
|
||||
}
|
||||
|
||||
if (sym_bind == STB_LOCAL)
|
||||
|
||||
2
tools/testing/selftests/bpf/.gitignore
vendored
2
tools/testing/selftests/bpf/.gitignore
vendored
@@ -23,6 +23,7 @@ test_tcpnotify_user
|
||||
test_libbpf
|
||||
xdping
|
||||
test_cpp
|
||||
test_progs_verification_cert
|
||||
*.d
|
||||
*.subskel.h
|
||||
*.skel.h
|
||||
@@ -32,7 +33,6 @@ test_cpp
|
||||
/cpuv4
|
||||
/host-tools
|
||||
/tools
|
||||
/runqslower
|
||||
/bench
|
||||
/veristat
|
||||
/sign-file
|
||||
|
||||
@@ -46,6 +46,7 @@ endif
|
||||
|
||||
CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \
|
||||
-Wall -Werror -fno-omit-frame-pointer \
|
||||
-Wno-unused-but-set-variable \
|
||||
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
|
||||
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
|
||||
-I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
|
||||
@@ -98,14 +99,11 @@ TEST_GEN_PROGS += test_progs-cpuv4
|
||||
TEST_INST_SUBDIRS += cpuv4
|
||||
endif
|
||||
|
||||
TEST_GEN_FILES = test_tc_edt.bpf.o
|
||||
TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
|
||||
|
||||
# Order correspond to 'make run_tests' order
|
||||
TEST_PROGS := test_kmod.sh \
|
||||
test_lirc_mode2.sh \
|
||||
test_tc_tunnel.sh \
|
||||
test_tc_edt.sh \
|
||||
test_xdping.sh \
|
||||
test_bpftool_build.sh \
|
||||
test_bpftool.sh \
|
||||
@@ -127,7 +125,6 @@ TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
|
||||
TEST_GEN_PROGS_EXTENDED = \
|
||||
bench \
|
||||
flow_dissector_load \
|
||||
runqslower \
|
||||
test_cpp \
|
||||
test_lirc_mode2_user \
|
||||
veristat \
|
||||
@@ -209,8 +206,6 @@ HOST_INCLUDE_DIR := $(INCLUDE_DIR)
|
||||
endif
|
||||
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
|
||||
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
|
||||
RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
|
||||
|
||||
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
|
||||
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
|
||||
../../../../vmlinux \
|
||||
@@ -232,7 +227,7 @@ $(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \
|
||||
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
|
||||
$(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \
|
||||
$(HOST_BUILD_DIR)/resolve_btfids \
|
||||
$(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
|
||||
$(INCLUDE_DIR))
|
||||
$(MAKE_DIRS):
|
||||
$(call msg,MKDIR,,$@)
|
||||
$(Q)mkdir -p $@
|
||||
@@ -304,17 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
|
||||
USE_BOOTSTRAP := "bootstrap/"
|
||||
endif
|
||||
|
||||
$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
|
||||
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
|
||||
OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
|
||||
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
|
||||
BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
|
||||
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
|
||||
BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \
|
||||
EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
|
||||
EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \
|
||||
cp $(RUNQSLOWER_OUTPUT)runqslower $@
|
||||
|
||||
TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
|
||||
|
||||
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
|
||||
@@ -453,7 +437,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
|
||||
-I$(abspath $(OUTPUT)/../usr/include) \
|
||||
-std=gnu11 \
|
||||
-fno-strict-aliasing \
|
||||
-Wno-compare-distinct-pointer-types
|
||||
-Wno-compare-distinct-pointer-types \
|
||||
-Wno-initializer-overrides \
|
||||
#
|
||||
# TODO: enable me -Wsign-compare
|
||||
|
||||
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
|
||||
@@ -498,7 +484,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
|
||||
|
||||
LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
|
||||
core_kern.c core_kern_overflow.c test_ringbuf.c \
|
||||
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
|
||||
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \
|
||||
test_ringbuf_overwrite.c
|
||||
|
||||
LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
|
||||
|
||||
@@ -543,6 +530,8 @@ TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
|
||||
$$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
|
||||
TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
|
||||
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
|
||||
TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
|
||||
$$(filter %.c,$(TRUNNER_LIB_SOURCES)))
|
||||
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
|
||||
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
|
||||
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
|
||||
@@ -686,6 +675,10 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
|
||||
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
|
||||
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
|
||||
|
||||
$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
|
||||
$$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
|
||||
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
|
||||
|
||||
# non-flavored in-srctree builds receive special treatment, in particular, we
|
||||
# do not need to copy extra resources (see e.g. test_btf_dump_case())
|
||||
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
|
||||
@@ -699,6 +692,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
|
||||
|
||||
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
|
||||
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
|
||||
$(TRUNNER_LIB_OBJS) \
|
||||
$(RESOLVE_BTFIDS) \
|
||||
$(TRUNNER_BPFTOOL) \
|
||||
$(OUTPUT)/veristat \
|
||||
@@ -721,7 +715,8 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
|
||||
$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
|
||||
|
||||
$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
|
||||
$(Q)xxd -i -n test_progs_verification_cert $< > $@
|
||||
$(Q)ln -fs $< test_progs_verification_cert && \
|
||||
xxd -i test_progs_verification_cert > $@
|
||||
|
||||
# Define test_progs test runner.
|
||||
TRUNNER_TESTS_DIR := prog_tests
|
||||
@@ -745,6 +740,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
|
||||
$(VERIFY_SIG_HDR) \
|
||||
flow_dissector_load.h \
|
||||
ip_check_defrag_frags.h
|
||||
TRUNNER_LIB_SOURCES := find_bit.c
|
||||
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
|
||||
$(OUTPUT)/liburandom_read.so \
|
||||
$(OUTPUT)/xdp_synproxy \
|
||||
@@ -782,6 +778,7 @@ endif
|
||||
TRUNNER_TESTS_DIR := map_tests
|
||||
TRUNNER_BPF_PROGS_DIR := progs
|
||||
TRUNNER_EXTRA_SOURCES := test_maps.c
|
||||
TRUNNER_LIB_SOURCES :=
|
||||
TRUNNER_EXTRA_FILES :=
|
||||
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
|
||||
TRUNNER_BPF_CFLAGS :=
|
||||
@@ -803,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
|
||||
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
|
||||
|
||||
# Include find_bit.c to compile xskxceiver.
|
||||
EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
|
||||
EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
|
||||
$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
|
||||
@@ -893,7 +890,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
|
||||
$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \
|
||||
no_alu32 cpuv4 bpf_gcc \
|
||||
liburandom_read.so) \
|
||||
$(OUTPUT)/FEATURE-DUMP.selftests
|
||||
$(OUTPUT)/FEATURE-DUMP.selftests \
|
||||
test_progs_verification_cert
|
||||
|
||||
.PHONY: docs docs-clean
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@ static struct {
|
||||
int ringbuf_sz; /* per-ringbuf, in bytes */
|
||||
bool ringbuf_use_output; /* use slower output API */
|
||||
int perfbuf_sz; /* per-CPU size, in pages */
|
||||
bool overwrite;
|
||||
bool bench_producer;
|
||||
} args = {
|
||||
.back2back = false,
|
||||
.batch_cnt = 500,
|
||||
@@ -27,6 +29,8 @@ static struct {
|
||||
.ringbuf_sz = 512 * 1024,
|
||||
.ringbuf_use_output = false,
|
||||
.perfbuf_sz = 128,
|
||||
.overwrite = false,
|
||||
.bench_producer = false,
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -35,6 +39,8 @@ enum {
|
||||
ARG_RB_BATCH_CNT = 2002,
|
||||
ARG_RB_SAMPLED = 2003,
|
||||
ARG_RB_SAMPLE_RATE = 2004,
|
||||
ARG_RB_OVERWRITE = 2005,
|
||||
ARG_RB_BENCH_PRODUCER = 2006,
|
||||
};
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
|
||||
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
|
||||
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
|
||||
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
|
||||
{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
|
||||
{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
|
||||
{},
|
||||
};
|
||||
|
||||
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case ARG_RB_OVERWRITE:
|
||||
args.overwrite = true;
|
||||
break;
|
||||
case ARG_RB_BENCH_PRODUCER:
|
||||
args.bench_producer = true;
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
|
||||
|
||||
static void bufs_validate(void)
|
||||
{
|
||||
if (env.consumer_cnt != 1) {
|
||||
fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
|
||||
if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
|
||||
fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.overwrite && !args.bench_producer) {
|
||||
fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.bench_producer && env.consumer_cnt != 0) {
|
||||
fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.bench_producer && args.back2back) {
|
||||
fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (args.bench_producer && args.sampled) {
|
||||
fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!args.bench_producer && env.consumer_cnt != 1) {
|
||||
fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
|
||||
{
|
||||
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
|
||||
|
||||
res->hits = atomic_swap(&buf_hits.value, 0);
|
||||
if (args.bench_producer)
|
||||
res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
|
||||
else
|
||||
res->hits = atomic_swap(&buf_hits.value, 0);
|
||||
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
|
||||
}
|
||||
|
||||
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
|
||||
{
|
||||
__u32 flags;
|
||||
struct bpf_map *ringbuf;
|
||||
struct ringbuf_bench *skel;
|
||||
|
||||
setup_libbpf();
|
||||
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
|
||||
|
||||
skel->rodata->batch_cnt = args.batch_cnt;
|
||||
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
|
||||
skel->rodata->bench_producer = args.bench_producer;
|
||||
|
||||
if (args.sampled)
|
||||
/* record data + header take 16 bytes */
|
||||
skel->rodata->wakeup_data_size = args.sample_rate * 16;
|
||||
|
||||
bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
|
||||
ringbuf = skel->maps.ringbuf;
|
||||
if (args.overwrite) {
|
||||
flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
|
||||
bpf_map__set_map_flags(ringbuf, flags);
|
||||
}
|
||||
|
||||
bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
|
||||
|
||||
if (ringbuf_bench__load(skel)) {
|
||||
fprintf(stderr, "failed to load skeleton\n");
|
||||
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
|
||||
{
|
||||
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
|
||||
struct bpf_link *link;
|
||||
int map_fd;
|
||||
|
||||
ctx->skel = ringbuf_setup_skeleton();
|
||||
ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
|
||||
buf_process_sample, NULL, NULL);
|
||||
|
||||
map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
|
||||
ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
|
||||
if (!ctx->ringbuf) {
|
||||
fprintf(stderr, "failed to create ringbuf\n");
|
||||
exit(1);
|
||||
|
||||
@@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void)
|
||||
{
|
||||
setup_ctx();
|
||||
bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
|
||||
bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
|
||||
bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
|
||||
load_ctx();
|
||||
/* override driver program */
|
||||
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
|
||||
ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
|
||||
}
|
||||
|
||||
static void trigger_kprobe_setup(void)
|
||||
|
||||
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
|
||||
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
|
||||
done
|
||||
|
||||
header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
|
||||
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
|
||||
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
|
||||
done
|
||||
|
||||
@@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
|
||||
|
||||
static inline void __list_del(arena_list_node_t *n)
|
||||
{
|
||||
arena_list_node_t *next = n->next, *tmp;
|
||||
arena_list_node_t *next = n->next;
|
||||
arena_list_node_t * __arena *pprev = n->pprev;
|
||||
|
||||
cast_user(next);
|
||||
cast_kern(pprev);
|
||||
tmp = *pprev;
|
||||
cast_kern(tmp);
|
||||
WRITE_ONCE(tmp, next);
|
||||
WRITE_ONCE(*pprev, next);
|
||||
if (next) {
|
||||
cast_user(pprev);
|
||||
cast_kern(next);
|
||||
|
||||
128
tools/testing/selftests/bpf/bpf_arena_strsearch.h
Normal file
128
tools/testing/selftests/bpf/bpf_arena_strsearch.h
Normal file
@@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
|
||||
#pragma once
|
||||
#include "bpf_arena_common.h"
|
||||
|
||||
__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
|
||||
{
|
||||
const char __arena *sc;
|
||||
|
||||
for (sc = s; *sc != '\0'; ++sc)
|
||||
cond_break;
|
||||
return sc - s;
|
||||
}
|
||||
|
||||
/**
|
||||
* glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
|
||||
* @pat: Shell-style pattern to match, e.g. "*.[ch]".
|
||||
* @str: String to match. The pattern must match the entire string.
|
||||
*
|
||||
* Perform shell-style glob matching, returning true (1) if the match
|
||||
* succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0).
|
||||
*
|
||||
* Pattern metacharacters are ?, *, [ and \.
|
||||
* (And, inside character classes, !, - and ].)
|
||||
*
|
||||
* This is small and simple implementation intended for device blacklists
|
||||
* where a string is matched against a number of patterns. Thus, it
|
||||
* does not preprocess the patterns. It is non-recursive, and run-time
|
||||
* is at most quadratic: strlen(@str)*strlen(@pat).
|
||||
*
|
||||
* An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
|
||||
* it takes 6 passes over the pattern before matching the string.
|
||||
*
|
||||
* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
|
||||
* treat / or leading . specially; it isn't actually used for pathnames.
|
||||
*
|
||||
* Note that according to glob(7) (and unlike bash), character classes
|
||||
* are complemented by a leading !; this does not support the regex-style
|
||||
* [^a-z] syntax.
|
||||
*
|
||||
* An opening bracket without a matching close is matched literally.
|
||||
*/
|
||||
__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
|
||||
{
|
||||
/*
|
||||
* Backtrack to previous * on mismatch and retry starting one
|
||||
* character later in the string. Because * matches all characters
|
||||
* (no exception for /), it can be easily proved that there's
|
||||
* never a need to backtrack multiple levels.
|
||||
*/
|
||||
char const __arena *back_pat = NULL, *back_str;
|
||||
|
||||
/*
|
||||
* Loop over each token (character or class) in pat, matching
|
||||
* it against the remaining unmatched tail of str. Return false
|
||||
* on mismatch, or true after matching the trailing nul bytes.
|
||||
*/
|
||||
for (;;) {
|
||||
unsigned char c = *str++;
|
||||
unsigned char d = *pat++;
|
||||
|
||||
switch (d) {
|
||||
case '?': /* Wildcard: anything but nul */
|
||||
if (c == '\0')
|
||||
return false;
|
||||
break;
|
||||
case '*': /* Any-length wildcard */
|
||||
if (*pat == '\0') /* Optimize trailing * case */
|
||||
return true;
|
||||
back_pat = pat;
|
||||
back_str = --str; /* Allow zero-length match */
|
||||
break;
|
||||
case '[': { /* Character class */
|
||||
bool match = false, inverted = (*pat == '!');
|
||||
char const __arena *class = pat + inverted;
|
||||
unsigned char a = *class++;
|
||||
|
||||
/*
|
||||
* Iterate over each span in the character class.
|
||||
* A span is either a single character a, or a
|
||||
* range a-b. The first span may begin with ']'.
|
||||
*/
|
||||
do {
|
||||
unsigned char b = a;
|
||||
|
||||
if (a == '\0') /* Malformed */
|
||||
goto literal;
|
||||
|
||||
if (class[0] == '-' && class[1] != ']') {
|
||||
b = class[1];
|
||||
|
||||
if (b == '\0')
|
||||
goto literal;
|
||||
|
||||
class += 2;
|
||||
/* Any special action if a > b? */
|
||||
}
|
||||
match |= (a <= c && c <= b);
|
||||
cond_break;
|
||||
} while ((a = *class++) != ']');
|
||||
|
||||
if (match == inverted)
|
||||
goto backtrack;
|
||||
pat = class;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
d = *pat++;
|
||||
__attribute__((__fallthrough__));
|
||||
default: /* Literal character */
|
||||
literal:
|
||||
if (c == d) {
|
||||
if (d == '\0')
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
backtrack:
|
||||
if (c == '\0' || !back_pat)
|
||||
return false; /* No point continuing */
|
||||
/* Try again from last *, one character later in str. */
|
||||
pat = back_pat;
|
||||
str = ++back_str;
|
||||
break;
|
||||
}
|
||||
cond_break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
|
||||
* Either a direct pointer to the dynptr data or a pointer to the user-provided
|
||||
* buffer if unable to obtain a direct pointer
|
||||
*/
|
||||
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
|
||||
void *buffer, __u32 buffer__szk) __ksym __weak;
|
||||
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
|
||||
void *buffer, __u64 buffer__szk) __ksym __weak;
|
||||
|
||||
/* Description
|
||||
* Obtain a read-write pointer to the dynptr's data
|
||||
@@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
|
||||
* Either a direct pointer to the dynptr data or a pointer to the user-provided
|
||||
* buffer if unable to obtain a direct pointer
|
||||
*/
|
||||
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
|
||||
void *buffer, __u32 buffer__szk) __ksym __weak;
|
||||
extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
|
||||
__u64 buffer__szk) __ksym __weak;
|
||||
|
||||
extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
|
||||
extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
|
||||
extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
|
||||
extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
|
||||
extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
|
||||
extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
|
||||
extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
|
||||
|
||||
/* Description
|
||||
|
||||
@@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms)
|
||||
int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
|
||||
const struct network_helper_opts *opts)
|
||||
{
|
||||
int fd;
|
||||
int on = 1, fd;
|
||||
|
||||
if (!opts)
|
||||
opts = &default_opts;
|
||||
@@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a
|
||||
if (settimeo(fd, opts->timeout_ms))
|
||||
goto error_close;
|
||||
|
||||
if (type == SOCK_STREAM &&
|
||||
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
|
||||
log_err("Failed to enable SO_REUSEADDR");
|
||||
goto error_close;
|
||||
}
|
||||
|
||||
if (opts->post_socket_cb &&
|
||||
opts->post_socket_cb(fd, opts->cb_opts)) {
|
||||
log_err("Failed to call post_socket_cb");
|
||||
@@ -766,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
|
||||
return err;
|
||||
}
|
||||
|
||||
int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
|
||||
{
|
||||
int ifindex, ret;
|
||||
|
||||
if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
|
||||
"at least one program fd is valid"))
|
||||
return -1;
|
||||
|
||||
ifindex = if_nametoindex(dev);
|
||||
if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
|
||||
return -1;
|
||||
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
|
||||
.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
|
||||
.priority = 1, .prog_fd = ingress_fd);
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
|
||||
.priority = 1, .prog_fd = egress_fd);
|
||||
|
||||
ret = bpf_tc_hook_create(&hook);
|
||||
if (!ASSERT_OK(ret, "create tc hook"))
|
||||
return ret;
|
||||
|
||||
if (ingress_fd >= 0) {
|
||||
hook.attach_point = BPF_TC_INGRESS;
|
||||
ret = bpf_tc_attach(&hook, &opts1);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
|
||||
bpf_tc_hook_destroy(&hook);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (egress_fd >= 0) {
|
||||
hook.attach_point = BPF_TC_EGRESS;
|
||||
ret = bpf_tc_attach(&hook, &opts2);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
|
||||
bpf_tc_hook_destroy(&hook);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef TRAFFIC_MONITOR
|
||||
struct tmonitor_ctx {
|
||||
pcap_t *pcap;
|
||||
|
||||
@@ -255,6 +255,22 @@ struct tmonitor_ctx;
|
||||
|
||||
typedef int (*tm_print_fn_t)(const char *format, va_list args);
|
||||
|
||||
/**
|
||||
* tc_prog_attach - attach BPF program(s) to an interface
|
||||
*
|
||||
* Takes file descriptors pointing to at least one, at most two BPF
|
||||
* programs, and attach those programs to an interface ingress, egress or
|
||||
* both.
|
||||
*
|
||||
* @dev: string containing the interface name
|
||||
* @ingress_fd: file descriptor of the program to attach to interface ingress
|
||||
* @egress_fd: file descriptor of the program to attach to interface egress
|
||||
*
|
||||
* Returns 0 on success, -1 if no valid file descriptor has been found, if
|
||||
* the interface name is invalid or if an error ocurred during attach.
|
||||
*/
|
||||
int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
|
||||
|
||||
#ifdef TRAFFIC_MONITOR
|
||||
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
|
||||
const char *subtest_name);
|
||||
|
||||
30
tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
Normal file
30
tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
Normal file
@@ -0,0 +1,30 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
|
||||
#include <test_progs.h>
|
||||
#include "arena_strsearch.skel.h"
|
||||
|
||||
static void test_arena_str(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts);
|
||||
struct arena_strsearch *skel;
|
||||
int ret;
|
||||
|
||||
skel = arena_strsearch__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
|
||||
return;
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
|
||||
ASSERT_OK(ret, "ret_add");
|
||||
ASSERT_OK(opts.retval, "retval");
|
||||
if (skel->bss->skip) {
|
||||
printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
|
||||
test__skip();
|
||||
}
|
||||
arena_strsearch__destroy(skel);
|
||||
}
|
||||
|
||||
void test_arena_strsearch(void)
|
||||
{
|
||||
if (test__start_subtest("arena_strsearch"))
|
||||
test_arena_str();
|
||||
}
|
||||
292
tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
Normal file
292
tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
Normal file
@@ -0,0 +1,292 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <test_progs.h>
|
||||
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/tcp.h>
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
#include "bpf_gotox.skel.h"
|
||||
|
||||
static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, topts,
|
||||
.ctx_in = ctx_in,
|
||||
.ctx_size_in = ctx_size_in,
|
||||
);
|
||||
int err, prog_fd;
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
err = bpf_prog_test_run_opts(prog_fd, &topts);
|
||||
ASSERT_OK(err, "test_run_opts err");
|
||||
}
|
||||
|
||||
static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
|
||||
{
|
||||
if (skel->data->skip)
|
||||
test__skip();
|
||||
else
|
||||
check(skel);
|
||||
}
|
||||
|
||||
static void check_simple(struct bpf_gotox *skel,
|
||||
struct bpf_program *prog,
|
||||
__u64 ctx_in,
|
||||
__u64 expected)
|
||||
{
|
||||
skel->bss->ret_user = 0;
|
||||
|
||||
__test_run(prog, &ctx_in, sizeof(ctx_in));
|
||||
|
||||
if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
|
||||
return;
|
||||
}
|
||||
|
||||
static void check_simple_fentry(struct bpf_gotox *skel,
|
||||
struct bpf_program *prog,
|
||||
__u64 ctx_in,
|
||||
__u64 expected)
|
||||
{
|
||||
skel->bss->in_user = ctx_in;
|
||||
skel->bss->ret_user = 0;
|
||||
|
||||
/* trigger */
|
||||
usleep(1);
|
||||
|
||||
if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
|
||||
return;
|
||||
}
|
||||
|
||||
/* validate that for two loads of the same jump table libbpf generates only one map */
|
||||
static void check_one_map_two_jumps(struct bpf_gotox *skel)
|
||||
{
|
||||
struct bpf_prog_info prog_info;
|
||||
struct bpf_map_info map_info;
|
||||
__u32 len;
|
||||
__u32 map_ids[16];
|
||||
int prog_fd, map_fd;
|
||||
int ret;
|
||||
int i;
|
||||
bool seen = false;
|
||||
|
||||
memset(&prog_info, 0, sizeof(prog_info));
|
||||
prog_info.map_ids = (long)map_ids;
|
||||
prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
|
||||
prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
|
||||
return;
|
||||
|
||||
len = sizeof(prog_info);
|
||||
ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
|
||||
if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < prog_info.nr_map_ids; i++) {
|
||||
map_fd = bpf_map_get_fd_by_id(map_ids[i]);
|
||||
if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
|
||||
return;
|
||||
|
||||
len = sizeof(map_info);
|
||||
memset(&map_info, 0, len);
|
||||
ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
|
||||
if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
|
||||
close(map_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
|
||||
if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
|
||||
close(map_fd);
|
||||
return;
|
||||
}
|
||||
seen = true;
|
||||
}
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
ASSERT_EQ(seen, true, "no INSN_ARRAY map");
|
||||
}
|
||||
|
||||
static void check_one_switch(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.one_switch, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_two_switches(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.two_switches, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_big_jump_table(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 11, 27, 31, 22, 45, 99};
|
||||
__u64 out[] = {2, 3, 4, 5, 19, 19, 19};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_one_jump_two_maps(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_static_global(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_nonstatic_global(struct bpf_gotox *skel)
|
||||
{
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
|
||||
}
|
||||
|
||||
static void check_other_sec(struct bpf_gotox *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
link = bpf_program__attach(skel->progs.simple_test_other_sec);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_static_global_other_sec(struct bpf_gotox *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
link = bpf_program__attach(skel->progs.use_static_global_other_sec);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
__u64 in[] = {0, 1, 2, 3, 4, 5, 77};
|
||||
__u64 out[] = {2, 3, 4, 5, 7, 19, 19};
|
||||
int i;
|
||||
|
||||
link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(in); i++)
|
||||
check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
void test_bpf_gotox(void)
|
||||
{
|
||||
struct bpf_gotox *skel;
|
||||
int ret;
|
||||
|
||||
skel = bpf_gotox__open();
|
||||
if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
|
||||
return;
|
||||
|
||||
ret = bpf_gotox__load(skel);
|
||||
if (!ASSERT_OK(ret, "bpf_gotox__load"))
|
||||
return;
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
if (test__start_subtest("one-switch"))
|
||||
__subtest(skel, check_one_switch);
|
||||
|
||||
if (test__start_subtest("one-switch-non-zero-sec-offset"))
|
||||
__subtest(skel, check_one_switch_non_zero_sec_off);
|
||||
|
||||
if (test__start_subtest("two-switches"))
|
||||
__subtest(skel, check_two_switches);
|
||||
|
||||
if (test__start_subtest("big-jump-table"))
|
||||
__subtest(skel, check_big_jump_table);
|
||||
|
||||
if (test__start_subtest("static-global"))
|
||||
__subtest(skel, check_static_global);
|
||||
|
||||
if (test__start_subtest("nonstatic-global"))
|
||||
__subtest(skel, check_nonstatic_global);
|
||||
|
||||
if (test__start_subtest("other-sec"))
|
||||
__subtest(skel, check_other_sec);
|
||||
|
||||
if (test__start_subtest("static-global-other-sec"))
|
||||
__subtest(skel, check_static_global_other_sec);
|
||||
|
||||
if (test__start_subtest("nonstatic-global-other-sec"))
|
||||
__subtest(skel, check_nonstatic_global_other_sec);
|
||||
|
||||
if (test__start_subtest("one-jump-two-maps"))
|
||||
__subtest(skel, check_one_jump_two_maps);
|
||||
|
||||
if (test__start_subtest("one-map-two-jumps"))
|
||||
__subtest(skel, check_one_map_two_jumps);
|
||||
|
||||
bpf_gotox__destroy(skel);
|
||||
}
|
||||
504
tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
Normal file
504
tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
Normal file
@@ -0,0 +1,504 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <test_progs.h>
|
||||
|
||||
#ifdef __x86_64__
|
||||
static int map_create(__u32 map_type, __u32 max_entries)
|
||||
{
|
||||
const char *map_name = "insn_array";
|
||||
__u32 key_size = 4;
|
||||
__u32 value_size = sizeof(struct bpf_insn_array_value);
|
||||
|
||||
return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
|
||||
}
|
||||
|
||||
static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_prog_load_opts, opts);
|
||||
|
||||
opts.fd_array = fd_array;
|
||||
opts.fd_array_cnt = fd_array_cnt;
|
||||
|
||||
return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
|
||||
}
|
||||
|
||||
static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
|
||||
{
|
||||
struct bpf_insn_array_value val = {};
|
||||
int prog_fd = -1, map_fd, i;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++) {
|
||||
val.orig_off = map_in[i];
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < insn_cnt; i++) {
|
||||
char buf[64];
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
|
||||
goto cleanup;
|
||||
|
||||
snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
|
||||
ASSERT_EQ(val.xlated_off, map_out[i], buf);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
close(prog_fd);
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a program, which will not be anyhow mangled by the verifier. Add an
|
||||
* insn_array map pointing to every instruction. Check that it hasn't changed
|
||||
* after the program load.
|
||||
*/
|
||||
static void check_one_to_one_mapping(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 4),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 3),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
__u32 map_in[] = {0, 1, 2, 3, 4, 5};
|
||||
__u32 map_out[] = {0, 1, 2, 3, 4, 5};
|
||||
|
||||
__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Load a program with two patches (get jiffies, for simplicity). Add an
|
||||
* insn_array map pointing to every instruction. Check how it was changed
|
||||
* after the program load.
|
||||
*/
|
||||
static void check_simple(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
__u32 map_in[] = {0, 1, 2, 3, 4, 5};
|
||||
__u32 map_out[] = {0, 1, 4, 5, 8, 9};
|
||||
|
||||
__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verifier can delete code in two cases: nops & dead code. From insn
|
||||
* array's point of view, the two cases are the same, so test using
|
||||
* the simplest method: by loading some nops
|
||||
*/
|
||||
static void check_deletions(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
__u32 map_in[] = {0, 1, 2, 3, 4, 5};
|
||||
__u32 map_out[] = {0, -1, 1, -1, 2, 3};
|
||||
|
||||
__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same test as check_deletions, but also add code which adds instructions
|
||||
*/
|
||||
static void check_deletions_with_functions(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
__u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10};
|
||||
__u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
|
||||
|
||||
__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to load a program with a map which points to outside of the program
|
||||
*/
|
||||
static void check_out_of_bounds_index(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 4),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 3),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd, map_fd;
|
||||
struct bpf_insn_array_value val = {};
|
||||
int key;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
key = 0;
|
||||
val.orig_off = ARRAY_SIZE(insns); /* too big */
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
|
||||
close(prog_fd);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to load a program with a map which points to the middle of 16-bit insn
|
||||
*/
|
||||
static void check_mid_insn_index(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd, map_fd;
|
||||
struct bpf_insn_array_value val = {};
|
||||
int key;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
key = 0;
|
||||
val.orig_off = 1; /* middle of 16-byte instruction */
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
|
||||
close(prog_fd);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
static void check_incorrect_index(void)
|
||||
{
|
||||
check_out_of_bounds_index();
|
||||
check_mid_insn_index();
|
||||
}
|
||||
|
||||
static int set_bpf_jit_harden(char *level)
|
||||
{
|
||||
char old_level;
|
||||
int err = -1;
|
||||
int fd = -1;
|
||||
|
||||
fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
|
||||
if (fd < 0) {
|
||||
ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = read(fd, &old_level, 1);
|
||||
if (err != 1) {
|
||||
ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
|
||||
err = -1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
|
||||
err = write(fd, level, 1);
|
||||
if (err != 1) {
|
||||
ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
|
||||
err = -1;
|
||||
goto end;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
*level = old_level;
|
||||
end:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void check_blindness(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 4),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 3),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 2),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd = -1, map_fd;
|
||||
struct bpf_insn_array_value val = {};
|
||||
char bpf_jit_harden = '@'; /* non-exizsting value */
|
||||
int i;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
val.orig_off = i;
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
bpf_jit_harden = '2';
|
||||
if (set_bpf_jit_harden(&bpf_jit_harden)) {
|
||||
bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
char fmt[32];
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
|
||||
goto cleanup;
|
||||
|
||||
snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
|
||||
ASSERT_EQ(val.xlated_off, i * 3, fmt);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* restore the old one */
|
||||
if (bpf_jit_harden != '@')
|
||||
set_bpf_jit_harden(&bpf_jit_harden);
|
||||
|
||||
close(prog_fd);
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
/* Once map was initialized, it should be frozen */
|
||||
static void check_load_unfrozen_map(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd = -1, map_fd;
|
||||
struct bpf_insn_array_value val = {};
|
||||
int i;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
val.orig_off = i;
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
|
||||
goto cleanup;
|
||||
|
||||
/* correctness: now freeze the map, the program should load fine */
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
|
||||
goto cleanup;
|
||||
|
||||
ASSERT_EQ(val.xlated_off, i, "val should be equal i");
|
||||
}
|
||||
|
||||
cleanup:
|
||||
close(prog_fd);
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
/* Map can be used only by one BPF program */
|
||||
static void check_no_map_reuse(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd = -1, map_fd, extra_fd = -1;
|
||||
struct bpf_insn_array_value val = {};
|
||||
int i;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
val.orig_off = i;
|
||||
if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(insns); i++) {
|
||||
if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
|
||||
goto cleanup;
|
||||
|
||||
ASSERT_EQ(val.xlated_off, i, "val should be equal i");
|
||||
}
|
||||
|
||||
extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
|
||||
if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
|
||||
goto cleanup;
|
||||
|
||||
/* correctness: check that prog is still loadable without fd_array */
|
||||
extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
|
||||
if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
close(extra_fd);
|
||||
close(prog_fd);
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
static void check_bpf_no_lookup(void)
|
||||
{
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int prog_fd = -1, map_fd;
|
||||
|
||||
map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
|
||||
if (!ASSERT_GE(map_fd, 0, "map_create"))
|
||||
return;
|
||||
|
||||
insns[0].imm = map_fd;
|
||||
|
||||
if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
|
||||
goto cleanup;
|
||||
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
|
||||
if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
|
||||
goto cleanup;
|
||||
|
||||
/* correctness: check that prog is still loadable with normal map */
|
||||
close(map_fd);
|
||||
map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
|
||||
insns[0].imm = map_fd;
|
||||
prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
|
||||
if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
close(prog_fd);
|
||||
close(map_fd);
|
||||
}
|
||||
|
||||
static void check_bpf_side(void)
|
||||
{
|
||||
check_bpf_no_lookup();
|
||||
}
|
||||
|
||||
static void __test_bpf_insn_array(void)
|
||||
{
|
||||
/* Test if offsets are adjusted properly */
|
||||
|
||||
if (test__start_subtest("one2one"))
|
||||
check_one_to_one_mapping();
|
||||
|
||||
if (test__start_subtest("simple"))
|
||||
check_simple();
|
||||
|
||||
if (test__start_subtest("deletions"))
|
||||
check_deletions();
|
||||
|
||||
if (test__start_subtest("deletions-with-functions"))
|
||||
check_deletions_with_functions();
|
||||
|
||||
if (test__start_subtest("blindness"))
|
||||
check_blindness();
|
||||
|
||||
/* Check all kinds of operations and related restrictions */
|
||||
|
||||
if (test__start_subtest("incorrect-index"))
|
||||
check_incorrect_index();
|
||||
|
||||
if (test__start_subtest("load-unfrozen-map"))
|
||||
check_load_unfrozen_map();
|
||||
|
||||
if (test__start_subtest("no-map-reuse"))
|
||||
check_no_map_reuse();
|
||||
|
||||
if (test__start_subtest("bpf-side-ops"))
|
||||
check_bpf_side();
|
||||
}
|
||||
#else
|
||||
static void __test_bpf_insn_array(void)
|
||||
{
|
||||
test__skip();
|
||||
}
|
||||
#endif
|
||||
|
||||
void test_bpf_insn_array(void)
|
||||
{
|
||||
__test_bpf_insn_array();
|
||||
}
|
||||
@@ -7495,6 +7495,71 @@ static struct btf_dedup_test dedup_tests[] = {
|
||||
BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.descr = "dedup: recursive typedef",
|
||||
/*
|
||||
* This test simulates a recursive typedef, which in GO is defined as such:
|
||||
*
|
||||
* type Foo func() Foo
|
||||
*
|
||||
* In BTF terms, this is represented as a TYPEDEF referencing
|
||||
* a FUNC_PROTO that returns the same TYPEDEF.
|
||||
*/
|
||||
.input = {
|
||||
.raw_types = {
|
||||
/*
|
||||
* [1] typedef Foo -> func() Foo
|
||||
* [2] func_proto() -> Foo
|
||||
* [3] typedef Foo -> func() Foo
|
||||
* [4] func_proto() -> Foo
|
||||
*/
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
|
||||
BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */
|
||||
BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0Foo"),
|
||||
},
|
||||
.expect = {
|
||||
.raw_types = {
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */
|
||||
BTF_FUNC_PROTO_ENC(1, 0), /* [2] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0Foo"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.descr = "dedup: typedef",
|
||||
/*
|
||||
* // CU 1:
|
||||
* typedef int foo;
|
||||
*
|
||||
* // CU 2:
|
||||
* typedef int foo;
|
||||
*/
|
||||
.input = {
|
||||
.raw_types = {
|
||||
/* CU 1 */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
|
||||
/* CU 2 */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0foo"),
|
||||
},
|
||||
.expect = {
|
||||
.raw_types = {
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
|
||||
BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
BTF_STR_SEC("\0foo"),
|
||||
},
|
||||
},
|
||||
{
|
||||
.descr = "dedup: typedef tags",
|
||||
.input = {
|
||||
|
||||
@@ -12,11 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
|
||||
vfprintf(ctx, fmt, args);
|
||||
}
|
||||
|
||||
/* Write raw BTF to file, return number of bytes written or negative errno */
|
||||
static ssize_t btf_raw_write(struct btf *btf, char *file)
|
||||
{
|
||||
ssize_t written = 0;
|
||||
const void *data;
|
||||
__u32 size = 0;
|
||||
int fd, ret;
|
||||
|
||||
fd = mkstemp(file);
|
||||
if (!ASSERT_GE(fd, 0, "create_file"))
|
||||
return -errno;
|
||||
|
||||
data = btf__raw_data(btf, &size);
|
||||
if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
|
||||
close(fd);
|
||||
return -EINVAL;
|
||||
}
|
||||
while (written < size) {
|
||||
ret = write(fd, data + written, size - written);
|
||||
if (!ASSERT_GE(ret, 0, "write succeeded")) {
|
||||
close(fd);
|
||||
return -errno;
|
||||
}
|
||||
written += ret;
|
||||
}
|
||||
close(fd);
|
||||
return written;
|
||||
}
|
||||
|
||||
static void __test_btf_split(bool multi)
|
||||
{
|
||||
char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
|
||||
char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
|
||||
char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
|
||||
ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
|
||||
struct btf_dump *d = NULL;
|
||||
const struct btf_type *t;
|
||||
struct btf *btf1, *btf2, *btf3 = NULL;
|
||||
const struct btf_type *t, *ot;
|
||||
struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
|
||||
struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
|
||||
int str_off, i, err;
|
||||
|
||||
btf1 = btf__new_empty();
|
||||
@@ -123,6 +157,45 @@ static void __test_btf_split(bool multi)
|
||||
" int uf2;\n"
|
||||
"};\n\n", "c_dump");
|
||||
|
||||
/* write base, split BTFs to files and ensure parsing succeeds */
|
||||
base_btf_sz = btf_raw_write(btf1, base_btf_file);
|
||||
if (base_btf_sz < 0)
|
||||
goto cleanup;
|
||||
split_btf_sz = btf_raw_write(btf2, split_btf_file);
|
||||
if (split_btf_sz < 0)
|
||||
goto cleanup;
|
||||
btf4 = btf__parse(base_btf_file, NULL);
|
||||
if (!ASSERT_OK_PTR(btf4, "parse_base"))
|
||||
goto cleanup;
|
||||
btf5 = btf__parse_split(split_btf_file, btf4);
|
||||
if (!ASSERT_OK_PTR(btf5, "parse_split"))
|
||||
goto cleanup;
|
||||
if (multi) {
|
||||
multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
|
||||
if (multisplit_btf_sz < 0)
|
||||
goto cleanup;
|
||||
btf6 = btf__parse_split(multisplit_btf_file, btf5);
|
||||
if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
|
||||
goto cleanup;
|
||||
} else {
|
||||
btf6 = btf5;
|
||||
}
|
||||
|
||||
if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
|
||||
goto cleanup;
|
||||
|
||||
/* compare parsed to original BTF */
|
||||
for (i = 1; i < btf__type_cnt(btf6); i++) {
|
||||
t = btf__type_by_id(btf6, i);
|
||||
if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
|
||||
goto cleanup;
|
||||
ot = btf__type_by_id(btf3, i);
|
||||
if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
|
||||
goto cleanup;
|
||||
if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (dump_buf_file)
|
||||
fclose(dump_buf_file);
|
||||
@@ -132,6 +205,16 @@ cleanup:
|
||||
btf__free(btf2);
|
||||
if (btf2 != btf3)
|
||||
btf__free(btf3);
|
||||
btf__free(btf4);
|
||||
btf__free(btf5);
|
||||
if (btf5 != btf6)
|
||||
btf__free(btf6);
|
||||
if (base_btf_sz > 0)
|
||||
unlink(base_btf_file);
|
||||
if (split_btf_sz > 0)
|
||||
unlink(split_btf_file);
|
||||
if (multisplit_btf_sz > 0)
|
||||
unlink(multisplit_btf_file);
|
||||
}
|
||||
|
||||
void test_btf_split(void)
|
||||
|
||||
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
|
||||
ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
|
||||
}
|
||||
|
||||
static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
|
||||
{
|
||||
int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
|
||||
struct __sk_buff skb = {
|
||||
.gso_size = 10,
|
||||
};
|
||||
LIBBPF_OPTS(bpf_test_run_opts, topts,
|
||||
.data_in = &pkt_v4,
|
||||
.data_size_in = sizeof(pkt_v4),
|
||||
.ctx_in = &skb,
|
||||
.ctx_size_in = sizeof(skb),
|
||||
);
|
||||
|
||||
/* Lower the mtu to test the BPF_MTU_CHK_SEGS */
|
||||
SYS_NOFAIL("ip link set dev lo mtu 10");
|
||||
err = bpf_prog_test_run_opts(prog_fd, &topts);
|
||||
SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_EQ(topts.retval, BPF_OK, "retval");
|
||||
}
|
||||
|
||||
static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
|
||||
{
|
||||
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
|
||||
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
|
||||
test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
|
||||
test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
|
||||
test_chk_segs_flag(skel, mtu);
|
||||
cleanup:
|
||||
test_check_mtu__destroy(skel);
|
||||
}
|
||||
|
||||
void serial_test_check_mtu(void)
|
||||
void test_ns_check_mtu(void)
|
||||
{
|
||||
int mtu_lo;
|
||||
|
||||
|
||||
@@ -22,79 +22,37 @@
|
||||
|
||||
static int duration = 0;
|
||||
|
||||
struct addr_port {
|
||||
in_port_t port;
|
||||
union {
|
||||
struct in_addr in_addr;
|
||||
struct in6_addr in6_addr;
|
||||
};
|
||||
};
|
||||
|
||||
struct tuple {
|
||||
int family;
|
||||
struct addr_port src;
|
||||
struct addr_port dst;
|
||||
};
|
||||
|
||||
static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
|
||||
{
|
||||
const struct sockaddr_in6 *in6;
|
||||
const struct sockaddr_in *in;
|
||||
|
||||
switch (sa->sa_family) {
|
||||
case AF_INET:
|
||||
in = (const struct sockaddr_in *)sa;
|
||||
ap->in_addr = in->sin_addr;
|
||||
ap->port = in->sin_port;
|
||||
return true;
|
||||
|
||||
case AF_INET6:
|
||||
in6 = (const struct sockaddr_in6 *)sa;
|
||||
ap->in6_addr = in6->sin6_addr;
|
||||
ap->port = in6->sin6_port;
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
|
||||
int *server, int *conn, struct tuple *tuple)
|
||||
static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
|
||||
int *server, int *conn,
|
||||
struct sockaddr_storage *src,
|
||||
struct sockaddr_storage *dst)
|
||||
{
|
||||
struct sockaddr_storage ss;
|
||||
socklen_t slen = sizeof(ss);
|
||||
struct sockaddr *sa = (struct sockaddr *)&ss;
|
||||
|
||||
*server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
|
||||
*server = start_server_addr(type, addr, len, NULL);
|
||||
if (*server < 0)
|
||||
return false;
|
||||
|
||||
if (CHECK_FAIL(getsockname(*server, sa, &slen)))
|
||||
if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
|
||||
goto close_server;
|
||||
|
||||
*conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
|
||||
*conn = connect_to_addr(type, &ss, slen, NULL);
|
||||
if (*conn < 0)
|
||||
goto close_server;
|
||||
|
||||
/* We want to simulate packets arriving at conn, so we have to
|
||||
* swap src and dst.
|
||||
*/
|
||||
slen = sizeof(ss);
|
||||
if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
|
||||
slen = sizeof(*dst);
|
||||
if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
|
||||
goto close_conn;
|
||||
|
||||
if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
|
||||
slen = sizeof(*src);
|
||||
if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
|
||||
goto close_conn;
|
||||
|
||||
slen = sizeof(ss);
|
||||
if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
|
||||
goto close_conn;
|
||||
|
||||
if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
|
||||
goto close_conn;
|
||||
|
||||
tuple->family = ss.ss_family;
|
||||
return true;
|
||||
|
||||
close_conn:
|
||||
@@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
|
||||
{
|
||||
struct sockaddr_in *addr4;
|
||||
struct sockaddr_in6 *addr6;
|
||||
memset(addr, 0, sizeof(*addr));
|
||||
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
addr4 = (struct sockaddr_in *)addr;
|
||||
memset(addr4, 0, sizeof(*addr4));
|
||||
addr4->sin_family = family;
|
||||
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
return sizeof(*addr4);
|
||||
case AF_INET6:
|
||||
addr6 = (struct sockaddr_in6 *)addr;
|
||||
memset(addr6, 0, sizeof(*addr6));
|
||||
addr6->sin6_family = family;
|
||||
addr6->sin6_addr = in6addr_loopback;
|
||||
return sizeof(*addr6);
|
||||
@@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
|
||||
}
|
||||
|
||||
static size_t build_input(const struct test_cfg *test, void *const buf,
|
||||
const struct tuple *tuple)
|
||||
const struct sockaddr_storage *src,
|
||||
const struct sockaddr_storage *dst)
|
||||
{
|
||||
in_port_t sport = tuple->src.port;
|
||||
struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
|
||||
struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
|
||||
struct sockaddr_in *src_in = (struct sockaddr_in *)src;
|
||||
struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
|
||||
sa_family_t family = src->ss_family;
|
||||
in_port_t sport, dport;
|
||||
encap_headers_t encap;
|
||||
struct iphdr ip;
|
||||
struct ipv6hdr ipv6;
|
||||
@@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
|
||||
uint8_t *p = buf;
|
||||
int proto;
|
||||
|
||||
sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
|
||||
dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
|
||||
|
||||
proto = IPPROTO_IPIP;
|
||||
if (tuple->family == AF_INET6)
|
||||
if (family == AF_INET6)
|
||||
proto = IPPROTO_IPV6;
|
||||
|
||||
encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
|
||||
@@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
|
||||
if (test->type == UDP)
|
||||
proto = IPPROTO_UDP;
|
||||
|
||||
switch (tuple->family) {
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
ip = (struct iphdr){
|
||||
.ihl = 5,
|
||||
.version = 4,
|
||||
.ttl = IPDEFTTL,
|
||||
.protocol = proto,
|
||||
.saddr = tuple->src.in_addr.s_addr,
|
||||
.daddr = tuple->dst.in_addr.s_addr,
|
||||
.saddr = src_in->sin_addr.s_addr,
|
||||
.daddr = dst_in->sin_addr.s_addr,
|
||||
};
|
||||
p = mempcpy(p, &ip, sizeof(ip));
|
||||
break;
|
||||
@@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
|
||||
.version = 6,
|
||||
.hop_limit = IPDEFTTL,
|
||||
.nexthdr = proto,
|
||||
.saddr = tuple->src.in6_addr,
|
||||
.daddr = tuple->dst.in6_addr,
|
||||
.saddr = src_in6->sin6_addr,
|
||||
.daddr = dst_in6->sin6_addr,
|
||||
};
|
||||
p = mempcpy(p, &ipv6, sizeof(ipv6));
|
||||
break;
|
||||
@@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
|
||||
case TCP:
|
||||
tcp = (struct tcphdr){
|
||||
.source = sport,
|
||||
.dest = tuple->dst.port,
|
||||
.dest = dport,
|
||||
.syn = (test->flags == SYN),
|
||||
.ack = (test->flags == ACK),
|
||||
};
|
||||
if (test->flags == SYN)
|
||||
tcp.syn = true;
|
||||
if (test->flags == ACK)
|
||||
tcp.ack = true;
|
||||
p = mempcpy(p, &tcp, sizeof(tcp));
|
||||
break;
|
||||
case UDP:
|
||||
udp = (struct udphdr){
|
||||
.source = sport,
|
||||
.dest = tuple->dst.port,
|
||||
.dest = dport,
|
||||
};
|
||||
p = mempcpy(p, &udp, sizeof(udp));
|
||||
break;
|
||||
@@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog)
|
||||
LIBBPF_OPTS(bpf_test_run_opts, tattr);
|
||||
int families[] = { AF_INET, AF_INET6 };
|
||||
struct sockaddr_storage ss;
|
||||
struct sockaddr *addr;
|
||||
socklen_t slen;
|
||||
int i, j, err, prog_fd;
|
||||
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
|
||||
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
|
||||
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
|
||||
struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
|
||||
struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
|
||||
|
||||
addr = (struct sockaddr *)&ss;
|
||||
for (i = 0; i < ARRAY_SIZE(families); i++) {
|
||||
slen = prepare_addr(&ss, families[i]);
|
||||
if (CHECK_FAIL(!slen))
|
||||
goto cleanup;
|
||||
|
||||
if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
|
||||
if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
|
||||
&servers[UDP][i], &conns[UDP][i],
|
||||
&tuples[UDP][i])))
|
||||
&srcs[UDP][i], &dsts[UDP][i])))
|
||||
goto cleanup;
|
||||
|
||||
if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
|
||||
if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
|
||||
&servers[TCP][i], &conns[TCP][i],
|
||||
&tuples[TCP][i])))
|
||||
&srcs[TCP][i], &dsts[TCP][i])))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
@@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog)
|
||||
struct test_cfg *test = &tests[i];
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(families); j++) {
|
||||
struct tuple *tuple = &tuples[test->type][j];
|
||||
struct sockaddr_storage *src = &srcs[test->type][j];
|
||||
struct sockaddr_storage *dst = &dsts[test->type][j];
|
||||
char input[256];
|
||||
char tmp[256];
|
||||
|
||||
test_str(tmp, sizeof(tmp), test, tuple->family);
|
||||
test_str(tmp, sizeof(tmp), test, families[j]);
|
||||
if (!test__start_subtest(tmp))
|
||||
continue;
|
||||
|
||||
@@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
|
||||
tattr.data_size_out = sizeof(tmp);
|
||||
|
||||
tattr.data_in = input;
|
||||
tattr.data_size_in = build_input(test, input, tuple);
|
||||
tattr.data_size_in = build_input(test, input, src, dst);
|
||||
if (CHECK_FAIL(!tattr.data_size_in))
|
||||
continue;
|
||||
|
||||
|
||||
117
tools/testing/selftests/bpf/prog_tests/file_reader.c
Normal file
117
tools/testing/selftests/bpf/prog_tests/file_reader.c
Normal file
@@ -0,0 +1,117 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include "file_reader.skel.h"
|
||||
#include "file_reader_fail.skel.h"
|
||||
#include <dlfcn.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
const char *user_ptr = "hello world";
|
||||
char file_contents[256000];
|
||||
|
||||
void *get_executable_base_addr(void)
|
||||
{
|
||||
Dl_info info;
|
||||
|
||||
if (!dladdr((void *)&get_executable_base_addr, &info)) {
|
||||
fprintf(stderr, "dladdr failed\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return info.dli_fbase;
|
||||
}
|
||||
|
||||
static int initialize_file_contents(void)
|
||||
{
|
||||
int fd, page_sz = sysconf(_SC_PAGESIZE);
|
||||
ssize_t n = 0, cur, off;
|
||||
void *addr;
|
||||
|
||||
fd = open("/proc/self/exe", O_RDONLY);
|
||||
if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
|
||||
return 1;
|
||||
|
||||
do {
|
||||
cur = read(fd, file_contents + n, sizeof(file_contents) - n);
|
||||
if (!ASSERT_GT(cur, 0, "read success"))
|
||||
break;
|
||||
n += cur;
|
||||
} while (n < sizeof(file_contents));
|
||||
|
||||
close(fd);
|
||||
|
||||
if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
|
||||
return 1;
|
||||
|
||||
addr = get_executable_base_addr();
|
||||
if (!ASSERT_NEQ(addr, NULL, "get executable address"))
|
||||
return 1;
|
||||
|
||||
/* page-align base file address */
|
||||
addr = (void *)((unsigned long)addr & ~(page_sz - 1));
|
||||
|
||||
/*
|
||||
* Page out range 0..512K, use 0..256K for positive tests and
|
||||
* 256K..512K for negative tests expecting page faults
|
||||
*/
|
||||
for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
|
||||
if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
|
||||
"madvise pageout"))
|
||||
return errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void run_test(const char *prog_name)
|
||||
{
|
||||
struct file_reader *skel;
|
||||
struct bpf_program *prog;
|
||||
int err, fd;
|
||||
|
||||
err = initialize_file_contents();
|
||||
if (!ASSERT_OK(err, "initialize file contents"))
|
||||
return;
|
||||
|
||||
skel = file_reader__open();
|
||||
if (!ASSERT_OK_PTR(skel, "file_reader__open"))
|
||||
return;
|
||||
|
||||
bpf_object__for_each_program(prog, skel->obj) {
|
||||
bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
|
||||
}
|
||||
|
||||
memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
err = file_reader__load(skel);
|
||||
if (!ASSERT_OK(err, "file_reader__load"))
|
||||
goto cleanup;
|
||||
|
||||
err = file_reader__attach(skel);
|
||||
if (!ASSERT_OK(err, "file_reader__attach"))
|
||||
goto cleanup;
|
||||
|
||||
fd = open("/proc/self/exe", O_RDONLY);
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
ASSERT_EQ(skel->bss->err, 0, "err");
|
||||
ASSERT_EQ(skel->bss->run_success, 1, "run_success");
|
||||
cleanup:
|
||||
file_reader__destroy(skel);
|
||||
}
|
||||
|
||||
void test_file_reader(void)
|
||||
{
|
||||
if (test__start_subtest("on_open_expect_fault"))
|
||||
run_test("on_open_expect_fault");
|
||||
|
||||
if (test__start_subtest("on_open_validate_file_read"))
|
||||
run_test("on_open_validate_file_read");
|
||||
|
||||
if (test__start_subtest("negative"))
|
||||
RUN_TESTS(file_reader_fail);
|
||||
}
|
||||
@@ -15,17 +15,17 @@ struct htab_update_ctx {
|
||||
static void test_reenter_update(void)
|
||||
{
|
||||
struct htab_update *skel;
|
||||
unsigned int key, value;
|
||||
void *value = NULL;
|
||||
unsigned int key, value_size;
|
||||
int err;
|
||||
|
||||
skel = htab_update__open();
|
||||
if (!ASSERT_OK_PTR(skel, "htab_update__open"))
|
||||
return;
|
||||
|
||||
/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
|
||||
bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
|
||||
bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
|
||||
err = htab_update__load(skel);
|
||||
if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
|
||||
if (!ASSERT_TRUE(!err, "htab_update__load") || err)
|
||||
goto out;
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
@@ -33,14 +33,33 @@ static void test_reenter_update(void)
|
||||
if (!ASSERT_OK(err, "htab_update__attach"))
|
||||
goto out;
|
||||
|
||||
/* Will trigger the reentrancy of bpf_map_update_elem() */
|
||||
value_size = bpf_map__value_size(skel->maps.htab);
|
||||
|
||||
value = calloc(1, value_size);
|
||||
if (!ASSERT_OK_PTR(value, "calloc value"))
|
||||
goto out;
|
||||
/*
|
||||
* First update: plain insert. This should NOT trigger the re-entrancy
|
||||
* path, because there is no old element to free yet.
|
||||
*/
|
||||
key = 0;
|
||||
value = 0;
|
||||
err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
|
||||
if (!ASSERT_OK(err, "add element"))
|
||||
err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "first update (insert)"))
|
||||
goto out;
|
||||
|
||||
ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
|
||||
/*
|
||||
* Second update: replace existing element with same key and trigger
|
||||
* the reentrancy of bpf_map_update_elem().
|
||||
* check_and_free_fields() calls bpf_obj_free_fields() on the old
|
||||
* value, which is where fentry program runs and performs a nested
|
||||
* bpf_map_update_elem(), triggering -EDEADLK.
|
||||
*/
|
||||
memset(value, 0, value_size);
|
||||
err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
|
||||
if (!ASSERT_OK(err, "second update (replace)"))
|
||||
goto out;
|
||||
|
||||
ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
|
||||
out:
|
||||
htab_update__destroy(skel);
|
||||
}
|
||||
|
||||
@@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
|
||||
if (!ASSERT_OK(ret, "kmem_cache_lookup"))
|
||||
break;
|
||||
|
||||
ASSERT_STREQ(r.name, name, "kmem_cache_name");
|
||||
ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
|
||||
"kmem_cache_name");
|
||||
ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
|
||||
|
||||
seen++;
|
||||
|
||||
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
|
||||
int pbe_size = sizeof(struct perf_branch_entry);
|
||||
int duration = 0;
|
||||
|
||||
if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
|
||||
"checked sample validity before prog run"))
|
||||
return;
|
||||
|
||||
if (CHECK(!skel->bss->valid, "output not valid",
|
||||
"no valid sample from prog"))
|
||||
return;
|
||||
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
|
||||
int written_stack = skel->bss->written_stack_out;
|
||||
int duration = 0;
|
||||
|
||||
if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
|
||||
"checked sample validity before prog run"))
|
||||
return;
|
||||
|
||||
if (CHECK(!skel->bss->valid, "output not valid",
|
||||
"no valid sample from prog"))
|
||||
return;
|
||||
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
|
||||
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
|
||||
if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
|
||||
goto out_destroy;
|
||||
/* spin the loop for a while (random high number) */
|
||||
for (i = 0; i < 1000000; ++i)
|
||||
|
||||
/* Spin the loop for a while by using a high iteration count, and by
|
||||
* checking whether the specific run count marker has been explicitly
|
||||
* incremented at least once by the backing perf_event BPF program.
|
||||
*/
|
||||
for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
|
||||
++j;
|
||||
|
||||
test_perf_branches__detach(skel);
|
||||
@@ -116,11 +128,11 @@ static void test_perf_branches_hw(void)
|
||||
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
|
||||
|
||||
/*
|
||||
* Some setups don't support branch records (virtual machines, !x86),
|
||||
* so skip test in this case.
|
||||
* Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
|
||||
* 3 which only supports BRS), so skip test in this case.
|
||||
*/
|
||||
if (pfd < 0) {
|
||||
if (errno == ENOENT || errno == EOPNOTSUPP) {
|
||||
if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
|
||||
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
|
||||
__func__);
|
||||
test__skip();
|
||||
|
||||
@@ -28,6 +28,7 @@ static void test_success(void)
|
||||
bpf_program__set_autoload(skel->progs.two_regions, true);
|
||||
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
|
||||
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
|
||||
bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
|
||||
bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
|
||||
bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
|
||||
bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
|
||||
@@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = {
|
||||
"non_sleepable_rcu_mismatch",
|
||||
"inproper_sleepable_helper",
|
||||
"inproper_sleepable_kfunc",
|
||||
"nested_rcu_region",
|
||||
"nested_rcu_region_unbalanced_1",
|
||||
"nested_rcu_region_unbalanced_2",
|
||||
"rcu_read_lock_global_subprog_lock",
|
||||
"rcu_read_lock_global_subprog_unlock",
|
||||
"rcu_read_lock_sleepable_helper_global_subprog",
|
||||
|
||||
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
|
||||
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
|
||||
refcounted_kptr__destroy(skel);
|
||||
}
|
||||
|
||||
void test_percpu_hash_refcounted_kptr_refcount_leak(void)
|
||||
{
|
||||
struct refcounted_kptr *skel;
|
||||
int cpu_nr, fd, err, key = 0;
|
||||
struct bpf_map *map;
|
||||
size_t values_sz;
|
||||
u64 *values;
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
.data_in = &pkt_v4,
|
||||
.data_size_in = sizeof(pkt_v4),
|
||||
.repeat = 1,
|
||||
);
|
||||
|
||||
cpu_nr = libbpf_num_possible_cpus();
|
||||
if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
|
||||
return;
|
||||
|
||||
values = calloc(cpu_nr, sizeof(u64));
|
||||
if (!ASSERT_OK_PTR(values, "calloc values"))
|
||||
return;
|
||||
|
||||
skel = refcounted_kptr__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
|
||||
free(values);
|
||||
return;
|
||||
}
|
||||
|
||||
values_sz = cpu_nr * sizeof(u64);
|
||||
memset(values, 0, values_sz);
|
||||
|
||||
map = skel->maps.percpu_hash;
|
||||
err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
|
||||
if (!ASSERT_OK(err, "bpf_map__update_elem"))
|
||||
goto out;
|
||||
|
||||
fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
|
||||
err = bpf_prog_test_run_opts(fd, &opts);
|
||||
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
|
||||
goto out;
|
||||
if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
|
||||
if (!ASSERT_OK(err, "bpf_map__update_elem"))
|
||||
goto out;
|
||||
|
||||
fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
|
||||
err = bpf_prog_test_run_opts(fd, &opts);
|
||||
ASSERT_OK(err, "bpf_prog_test_run_opts");
|
||||
ASSERT_EQ(opts.retval, 1, "opts.retval");
|
||||
|
||||
out:
|
||||
refcounted_kptr__destroy(skel);
|
||||
free(values);
|
||||
}
|
||||
|
||||
@@ -110,8 +110,8 @@ void serial_test_res_spin_lock_stress(void)
|
||||
ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
|
||||
sleep(5);
|
||||
unload_module("bpf_test_rqspinlock", false);
|
||||
|
||||
ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA");
|
||||
sleep(5);
|
||||
unload_module("bpf_test_rqspinlock", false);
|
||||
/*
|
||||
* Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
|
||||
* other cases (ABBA, ABBCCA).
|
||||
*/
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "test_ringbuf_n.lskel.h"
|
||||
#include "test_ringbuf_map_key.lskel.h"
|
||||
#include "test_ringbuf_write.lskel.h"
|
||||
#include "test_ringbuf_overwrite.lskel.h"
|
||||
|
||||
#define EDONE 7777
|
||||
|
||||
@@ -497,6 +498,68 @@ cleanup:
|
||||
test_ringbuf_map_key_lskel__destroy(skel_map_key);
|
||||
}
|
||||
|
||||
static void ringbuf_overwrite_mode_subtest(void)
|
||||
{
|
||||
unsigned long size, len1, len2, len3, len4, len5;
|
||||
unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
|
||||
struct test_ringbuf_overwrite_lskel *skel;
|
||||
int page_size = getpagesize();
|
||||
int err;
|
||||
|
||||
skel = test_ringbuf_overwrite_lskel__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
size = page_size;
|
||||
len1 = page_size / 2;
|
||||
len2 = page_size / 4;
|
||||
len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
|
||||
len4 = len3 - 8;
|
||||
len5 = len3; /* retry with len3 */
|
||||
|
||||
skel->maps.ringbuf.max_entries = size;
|
||||
skel->rodata->LEN1 = len1;
|
||||
skel->rodata->LEN2 = len2;
|
||||
skel->rodata->LEN3 = len3;
|
||||
skel->rodata->LEN4 = len4;
|
||||
skel->rodata->LEN5 = len5;
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
err = test_ringbuf_overwrite_lskel__load(skel);
|
||||
if (!ASSERT_OK(err, "skel_load"))
|
||||
goto cleanup;
|
||||
|
||||
err = test_ringbuf_overwrite_lskel__attach(skel);
|
||||
if (!ASSERT_OK(err, "skel_attach"))
|
||||
goto cleanup;
|
||||
|
||||
syscall(__NR_getpgid);
|
||||
|
||||
ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
|
||||
ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
|
||||
ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
|
||||
ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
|
||||
ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
|
||||
|
||||
ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
|
||||
|
||||
expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
|
||||
ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
|
||||
|
||||
ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
|
||||
|
||||
expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
|
||||
ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
|
||||
|
||||
expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
|
||||
ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
|
||||
|
||||
test_ringbuf_overwrite_lskel__detach(skel);
|
||||
cleanup:
|
||||
test_ringbuf_overwrite_lskel__destroy(skel);
|
||||
}
|
||||
|
||||
void test_ringbuf(void)
|
||||
{
|
||||
if (test__start_subtest("ringbuf"))
|
||||
@@ -507,4 +570,6 @@ void test_ringbuf(void)
|
||||
ringbuf_map_key_subtest();
|
||||
if (test__start_subtest("ringbuf_write"))
|
||||
ringbuf_write_subtest();
|
||||
if (test__start_subtest("ringbuf_overwrite_mode"))
|
||||
ringbuf_overwrite_mode_subtest();
|
||||
}
|
||||
|
||||
@@ -41,11 +41,7 @@ static struct bpf_object *obj;
|
||||
static __u32 index_zero;
|
||||
static int epfd;
|
||||
|
||||
static union sa46 {
|
||||
struct sockaddr_in6 v6;
|
||||
struct sockaddr_in v4;
|
||||
sa_family_t family;
|
||||
} srv_sa;
|
||||
static struct sockaddr_storage srv_sa;
|
||||
|
||||
#define RET_IF(condition, tag, format...) ({ \
|
||||
if (CHECK_FAIL(condition)) { \
|
||||
@@ -135,24 +131,24 @@ static int prepare_bpf_obj(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
|
||||
static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
|
||||
{
|
||||
memset(sa, 0, sizeof(*sa));
|
||||
sa->family = family;
|
||||
if (sa->family == AF_INET6)
|
||||
sa->v6.sin6_addr = in6addr_loopback;
|
||||
sa->ss_family = family;
|
||||
if (sa->ss_family == AF_INET6)
|
||||
((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
|
||||
else
|
||||
sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
}
|
||||
|
||||
static void sa46_init_inany(union sa46 *sa, sa_family_t family)
|
||||
static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
|
||||
{
|
||||
memset(sa, 0, sizeof(*sa));
|
||||
sa->family = family;
|
||||
if (sa->family == AF_INET6)
|
||||
sa->v6.sin6_addr = in6addr_any;
|
||||
sa->ss_family = family;
|
||||
if (sa->ss_family == AF_INET6)
|
||||
((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
|
||||
else
|
||||
sa->v4.sin_addr.s_addr = INADDR_ANY;
|
||||
((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
|
||||
}
|
||||
|
||||
static int read_int_sysctl(const char *sysctl)
|
||||
@@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
|
||||
int cli_fd)
|
||||
{
|
||||
struct data_check expected = {}, result;
|
||||
union sa46 cli_sa;
|
||||
struct sockaddr_storage cli_sa;
|
||||
socklen_t addrlen;
|
||||
int err;
|
||||
|
||||
@@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
|
||||
}
|
||||
|
||||
if (family == AF_INET6) {
|
||||
expected.eth_protocol = htons(ETH_P_IPV6);
|
||||
expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
|
||||
!srv_sa.v6.sin6_addr.s6_addr32[2] &&
|
||||
!srv_sa.v6.sin6_addr.s6_addr32[1] &&
|
||||
!srv_sa.v6.sin6_addr.s6_addr32[0];
|
||||
struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
|
||||
struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
|
||||
|
||||
memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
|
||||
sizeof(cli_sa.v6.sin6_addr));
|
||||
expected.eth_protocol = htons(ETH_P_IPV6);
|
||||
expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
|
||||
!srv_v6->sin6_addr.s6_addr32[2] &&
|
||||
!srv_v6->sin6_addr.s6_addr32[1] &&
|
||||
!srv_v6->sin6_addr.s6_addr32[0];
|
||||
|
||||
memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
|
||||
sizeof(cli_v6->sin6_addr));
|
||||
memcpy(&expected.skb_addrs[4], &in6addr_loopback,
|
||||
sizeof(in6addr_loopback));
|
||||
expected.skb_ports[0] = cli_sa.v6.sin6_port;
|
||||
expected.skb_ports[1] = srv_sa.v6.sin6_port;
|
||||
expected.skb_ports[0] = cli_v6->sin6_port;
|
||||
expected.skb_ports[1] = srv_v6->sin6_port;
|
||||
} else {
|
||||
expected.eth_protocol = htons(ETH_P_IP);
|
||||
expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
|
||||
struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
|
||||
struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
|
||||
|
||||
expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
|
||||
expected.eth_protocol = htons(ETH_P_IP);
|
||||
expected.bind_inany = !srv_v4->sin_addr.s_addr;
|
||||
|
||||
expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
|
||||
expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
|
||||
expected.skb_ports[0] = cli_sa.v4.sin_port;
|
||||
expected.skb_ports[1] = srv_sa.v4.sin_port;
|
||||
expected.skb_ports[0] = cli_v4->sin_port;
|
||||
expected.skb_ports[1] = srv_v4->sin_port;
|
||||
}
|
||||
|
||||
if (memcmp(&result, &expected, offsetof(struct data_check,
|
||||
@@ -364,16 +366,15 @@ static void check_results(void)
|
||||
static int send_data(int type, sa_family_t family, void *data, size_t len,
|
||||
enum result expected)
|
||||
{
|
||||
union sa46 cli_sa;
|
||||
struct sockaddr_storage cli_sa;
|
||||
int fd, err;
|
||||
|
||||
fd = socket(family, type, 0);
|
||||
RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
|
||||
|
||||
sa46_init_loopback(&cli_sa, family);
|
||||
ss_init_loopback(&cli_sa, family);
|
||||
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
|
||||
RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
|
||||
|
||||
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
|
||||
sizeof(srv_sa));
|
||||
RET_ERR(err != len && expected >= PASS,
|
||||
@@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
|
||||
socklen_t addrlen;
|
||||
|
||||
if (inany)
|
||||
sa46_init_inany(&srv_sa, family);
|
||||
ss_init_inany(&srv_sa, family);
|
||||
else
|
||||
sa46_init_loopback(&srv_sa, family);
|
||||
ss_init_loopback(&srv_sa, family);
|
||||
addrlen = sizeof(srv_sa);
|
||||
|
||||
/*
|
||||
|
||||
@@ -206,6 +206,11 @@ destroy_skel:
|
||||
skel_open_load_failure:
|
||||
close(pipe_c2p[0]);
|
||||
close(pipe_p2c[1]);
|
||||
/*
|
||||
* Child is either about to exit cleanly or stuck in case of errors.
|
||||
* Nudge it to exit.
|
||||
*/
|
||||
kill(pid, SIGKILL);
|
||||
wait(NULL);
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,9 @@ static const char * const test_cases[] = {
|
||||
"strcspn_str",
|
||||
"strcspn_reject",
|
||||
"strstr",
|
||||
"strcasestr",
|
||||
"strnstr",
|
||||
"strncasestr",
|
||||
};
|
||||
|
||||
void run_too_long_tests(void)
|
||||
|
||||
@@ -139,7 +139,7 @@ static void test_lsm_tailcall(void)
|
||||
if (CHECK_FAIL(!err))
|
||||
goto close_prog;
|
||||
|
||||
prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog);
|
||||
prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog);
|
||||
if (CHECK_FAIL(prog_fd < 0))
|
||||
goto close_prog;
|
||||
|
||||
|
||||
145
tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
Normal file
145
tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
Normal file
@@ -0,0 +1,145 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
|
||||
/*
|
||||
* BPF-based flow shaping
|
||||
*
|
||||
* The test brings up two veth in two isolated namespaces, attach some flow
|
||||
* shaping program onto it, and ensures that a manual speedtest maximum
|
||||
* value matches the rate set in the BPF shapers.
|
||||
*/
|
||||
|
||||
#include <asm-generic/socket.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/socket.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <pthread.h>
|
||||
#include "test_progs.h"
|
||||
#include "network_helpers.h"
|
||||
#include "test_tc_edt.skel.h"
|
||||
|
||||
#define SERVER_NS "tc-edt-server-ns"
|
||||
#define CLIENT_NS "tc-edt-client-ns"
|
||||
#define IP4_ADDR_VETH1 "192.168.1.1"
|
||||
#define IP4_ADDR_VETH2 "192.168.1.2"
|
||||
#define IP4_ADDR_VETH2_HEX 0xC0A80102
|
||||
|
||||
#define TIMEOUT_MS 2000
|
||||
#define TEST_PORT 9000
|
||||
#define TARGET_RATE_MBPS 5.0
|
||||
#define TX_BYTES_COUNT (1 * 1000 * 1000)
|
||||
#define RATE_ERROR_PERCENT 2.0
|
||||
|
||||
struct connection {
|
||||
int server_listen_fd;
|
||||
int server_conn_fd;
|
||||
int client_conn_fd;
|
||||
};
|
||||
|
||||
static int setup(struct test_tc_edt *skel)
|
||||
{
|
||||
struct nstoken *nstoken_client, *nstoken_server;
|
||||
int ret;
|
||||
|
||||
if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns"))
|
||||
goto fail;
|
||||
if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
|
||||
goto fail_delete_client_ns;
|
||||
|
||||
nstoken_client = open_netns(CLIENT_NS);
|
||||
if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
|
||||
goto fail_delete_server_ns;
|
||||
SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s",
|
||||
"veth2 netns " SERVER_NS);
|
||||
SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
|
||||
SYS(fail_close_client_ns, "ip link set veth1 up");
|
||||
|
||||
nstoken_server = open_netns(SERVER_NS);
|
||||
if (!ASSERT_OK_PTR(nstoken_server, "enter server ns"))
|
||||
goto fail_close_client_ns;
|
||||
SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
|
||||
SYS(fail_close_server_ns, "ip link set veth2 up");
|
||||
SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq");
|
||||
ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog));
|
||||
if (!ASSERT_OK(ret, "attach bpf prog"))
|
||||
goto fail_close_server_ns;
|
||||
skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000;
|
||||
close_netns(nstoken_server);
|
||||
close_netns(nstoken_client);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_close_server_ns:
|
||||
close_netns(nstoken_server);
|
||||
fail_close_client_ns:
|
||||
close_netns(nstoken_client);
|
||||
fail_delete_server_ns:
|
||||
remove_netns(SERVER_NS);
|
||||
fail_delete_client_ns:
|
||||
remove_netns(CLIENT_NS);
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void cleanup(void)
|
||||
{
|
||||
remove_netns(CLIENT_NS);
|
||||
remove_netns(SERVER_NS);
|
||||
}
|
||||
|
||||
static void run_test(void)
|
||||
{
|
||||
int server_fd, client_fd, err;
|
||||
double rate_mbps, rate_error;
|
||||
struct nstoken *nstoken;
|
||||
__u64 ts_start, ts_end;
|
||||
|
||||
nstoken = open_netns(SERVER_NS);
|
||||
if (!ASSERT_OK_PTR(nstoken, "open server ns"))
|
||||
return;
|
||||
server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2,
|
||||
TEST_PORT, TIMEOUT_MS);
|
||||
if (!ASSERT_OK_FD(server_fd, "start server"))
|
||||
return;
|
||||
|
||||
close_netns(nstoken);
|
||||
nstoken = open_netns(CLIENT_NS);
|
||||
if (!ASSERT_OK_PTR(nstoken, "open client ns"))
|
||||
return;
|
||||
client_fd = connect_to_fd(server_fd, 0);
|
||||
if (!ASSERT_OK_FD(client_fd, "connect client"))
|
||||
return;
|
||||
|
||||
ts_start = get_time_ns();
|
||||
err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT);
|
||||
ts_end = get_time_ns();
|
||||
close_netns(nstoken);
|
||||
ASSERT_OK(err, "send_recv_data");
|
||||
|
||||
rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0);
|
||||
rate_error =
|
||||
fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS);
|
||||
|
||||
ASSERT_LE(rate_error, RATE_ERROR_PERCENT,
|
||||
"rate error is lower than threshold");
|
||||
}
|
||||
|
||||
void test_tc_edt(void)
|
||||
{
|
||||
struct test_tc_edt *skel;
|
||||
|
||||
skel = test_tc_edt__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel open and load"))
|
||||
return;
|
||||
|
||||
if (!ASSERT_OK(setup(skel), "global setup"))
|
||||
return;
|
||||
|
||||
run_test();
|
||||
|
||||
cleanup();
|
||||
test_tc_edt__destroy(skel);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user