perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR

Arch-PEBS introduces a new MSR IA32_PEBS_BASE to store the arch-PEBS
buffer physical address. This patch allocates arch-PEBS buffer and then
initialize IA32_PEBS_BASE MSR with the buffer physical address.

Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-10-dapeng1.mi@linux.intel.com
This commit is contained in:
Dapeng Mi
2025-10-29 18:21:33 +08:00
committed by Peter Zijlstra
parent d21954c8a0
commit 2721e8da2d
4 changed files with 92 additions and 15 deletions

View File

@@ -5227,7 +5227,13 @@ err:
static int intel_pmu_cpu_prepare(int cpu)
{
return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
int ret;
ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
if (ret)
return ret;
return alloc_arch_pebs_buf_on_cpu(cpu);
}
static void flip_smm_bit(void *data)
@@ -5458,6 +5464,7 @@ static void intel_pmu_cpu_starting(int cpu)
return;
init_debug_store_on_cpu(cpu);
init_arch_pebs_on_cpu(cpu);
/*
* Deal with CPUs that don't clear their LBRs on power-up, and that may
* even boot with LBRs enabled.
@@ -5555,6 +5562,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
static void intel_pmu_cpu_dying(int cpu)
{
fini_debug_store_on_cpu(cpu);
fini_arch_pebs_on_cpu(cpu);
}
void intel_cpuc_finish(struct cpu_hw_events *cpuc)
@@ -5575,6 +5583,7 @@ static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
release_arch_pebs_buf_on_cpu(cpu);
intel_cpuc_finish(cpuc);
if (is_hybrid() && cpuc->pmu)

View File

@@ -625,13 +625,18 @@ static int alloc_pebs_buffer(int cpu)
int max, node = cpu_to_node(cpu);
void *buffer, *insn_buff, *cea;
if (!x86_pmu.ds_pebs)
if (!intel_pmu_has_pebs())
return 0;
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
if (x86_pmu.arch_pebs) {
hwev->pebs_vaddr = buffer;
return 0;
}
/*
* HSW+ already provides us the eventing ip; no need to allocate this
* buffer then.
@@ -644,7 +649,7 @@ static int alloc_pebs_buffer(int cpu)
}
per_cpu(insn_buffer, cpu) = insn_buff;
}
hwev->ds_pebs_vaddr = buffer;
hwev->pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds->pebs_buffer_base = (unsigned long) cea;
@@ -660,17 +665,20 @@ static void release_pebs_buffer(int cpu)
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
void *cea;
if (!x86_pmu.ds_pebs)
if (!intel_pmu_has_pebs())
return;
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
if (x86_pmu.ds_pebs) {
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->ds_pebs_vaddr = NULL;
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
}
dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
@@ -823,6 +831,56 @@ void reserve_ds_buffers(void)
}
}
inline int alloc_arch_pebs_buf_on_cpu(int cpu)
{
if (!x86_pmu.arch_pebs)
return 0;
return alloc_pebs_buffer(cpu);
}
inline void release_arch_pebs_buf_on_cpu(int cpu)
{
if (!x86_pmu.arch_pebs)
return;
release_pebs_buffer(cpu);
}
void init_arch_pebs_on_cpu(int cpu)
{
struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
u64 arch_pebs_base;
if (!x86_pmu.arch_pebs)
return;
if (!cpuc->pebs_vaddr) {
WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
x86_pmu.pebs_active = 0;
return;
}
/*
* 4KB-aligned pointer of the output buffer
* (__alloc_pages_node() return page aligned address)
* Buffer Size = 4KB * 2^SIZE
* contiguous physical buffer (__alloc_pages_node() with order)
*/
arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
(u32)(arch_pebs_base >> 32));
x86_pmu.pebs_active = 1;
}
inline void fini_arch_pebs_on_cpu(int cpu)
{
if (!x86_pmu.arch_pebs)
return;
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
}
/*
* BTS
*/
@@ -2883,8 +2941,8 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
return;
}
base = cpuc->ds_pebs_vaddr;
top = (void *)((u64)cpuc->ds_pebs_vaddr +
base = cpuc->pebs_vaddr;
top = (void *)((u64)cpuc->pebs_vaddr +
(index.wr << ARCH_PEBS_INDEX_WR_SHIFT));
index.wr = 0;

View File

@@ -283,8 +283,9 @@ struct cpu_hw_events {
* Intel DebugStore bits
*/
struct debug_store *ds;
void *ds_pebs_vaddr;
void *ds_bts_vaddr;
/* DS based PEBS or arch-PEBS buffer address */
void *pebs_vaddr;
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;
@@ -1617,6 +1618,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
int intel_pmu_init(void);
int alloc_arch_pebs_buf_on_cpu(int cpu);
void release_arch_pebs_buf_on_cpu(int cpu);
void init_arch_pebs_on_cpu(int cpu);
void fini_arch_pebs_on_cpu(int cpu);
void init_debug_store_on_cpu(int cpu);
void fini_debug_store_on_cpu(int cpu);

View File

@@ -4,7 +4,8 @@
#include <linux/percpu-defs.h>
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SHIFT 4
#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4 8