perf/x86/intel/ds: Factor out PEBS group processing code to functions

Adaptive PEBS and arch-PEBS share lots of same code to process these
PEBS groups, like basic, GPR and meminfo groups. Extract these shared
code to generic functions to avoid duplicated code.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-8-dapeng1.mi@linux.intel.com
This commit is contained in:
Dapeng Mi
2025-10-29 18:21:31 +08:00
committed by Peter Zijlstra
parent 8807d92270
commit 167cde7dc9

View File

@@ -2072,6 +2072,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
#define PEBS_LATENCY_MASK 0xffff
static inline void __setup_perf_sample_data(struct perf_event *event,
struct pt_regs *iregs,
struct perf_sample_data *data)
{
perf_sample_data_init(data, 0, event->hw.last_period);
/*
* We must however always use iregs for the unwinder to stay sane; the
* record BP,SP,IP can point into thin air when the record is from a
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
perf_sample_save_callchain(data, event, iregs);
}
static inline void __setup_pebs_basic_group(struct perf_event *event,
struct pt_regs *regs,
struct perf_sample_data *data,
u64 sample_type, u64 ip,
u64 tsc, u16 retire)
{
/* The ip in basic is EventingIP */
set_linear_ip(regs, ip);
regs->flags = PERF_EFLAGS_EXACT;
setup_pebs_time(event, data, tsc);
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
data->weight.var3_w = retire;
}
static inline void __setup_pebs_gpr_group(struct perf_event *event,
struct pt_regs *regs,
struct pebs_gprs *gprs,
u64 sample_type)
{
if (event->attr.precise_ip < 2) {
set_linear_ip(regs, gprs->ip);
regs->flags &= ~PERF_EFLAGS_EXACT;
}
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
adaptive_pebs_save_regs(regs, gprs);
}
static inline void __setup_pebs_meminfo_group(struct perf_event *event,
struct perf_sample_data *data,
u64 sample_type, u64 latency,
u16 instr_latency, u64 address,
u64 aux, u64 tsx_tuning, u64 ax)
{
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
data->weight.var2_w = instr_latency;
/*
* Although meminfo::latency is defined as a u64,
* only the lower 32 bits include the valid data
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT)
data->weight.full = latency ?: tsx_latency;
else
data->weight.var1_dw = (u32)latency ?: tsx_latency;
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, aux);
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = address;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
}
/*
* With adaptive PEBS the layout depends on what fields are configured.
*/
@@ -2081,12 +2165,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 sample_type = event->attr.sample_type;
struct pebs_basic *basic = __pebs;
void *next_record = basic + 1;
u64 sample_type, format_group;
struct pebs_meminfo *meminfo = NULL;
struct pebs_gprs *gprs = NULL;
struct x86_perf_regs *perf_regs;
u64 format_group;
u16 retire;
if (basic == NULL)
return;
@@ -2094,31 +2180,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_regs = container_of(regs, struct x86_perf_regs, regs);
perf_regs->xmm_regs = NULL;
sample_type = event->attr.sample_type;
format_group = basic->format_group;
perf_sample_data_init(data, 0, event->hw.last_period);
setup_pebs_time(event, data, basic->tsc);
/*
* We must however always use iregs for the unwinder to stay sane; the
* record BP,SP,IP can point into thin air when the record is from a
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
perf_sample_save_callchain(data, event, iregs);
__setup_perf_sample_data(event, iregs, data);
*regs = *iregs;
/* The ip in basic is EventingIP */
set_linear_ip(regs, basic->ip);
regs->flags = PERF_EFLAGS_EXACT;
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
data->weight.var3_w = basic->retire_latency;
else
data->weight.var3_w = 0;
}
/* basic group */
retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
basic->retire_latency : 0;
__setup_pebs_basic_group(event, regs, data, sample_type,
basic->ip, basic->tsc, retire);
/*
* The record for MEMINFO is in front of GP
@@ -2134,54 +2206,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
gprs = next_record;
next_record = gprs + 1;
if (event->attr.precise_ip < 2) {
set_linear_ip(regs, gprs->ip);
regs->flags &= ~PERF_EFLAGS_EXACT;
}
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
adaptive_pebs_save_regs(regs, gprs);
__setup_pebs_gpr_group(event, regs, gprs, sample_type);
}
if (format_group & PEBS_DATACFG_MEMINFO) {
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
meminfo->cache_latency : meminfo->mem_latency;
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
meminfo->cache_latency : meminfo->mem_latency;
u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
meminfo->instr_latency : 0;
u64 ax = gprs ? gprs->ax : 0;
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
data->weight.var2_w = meminfo->instr_latency;
/*
* Although meminfo::latency is defined as a u64,
* only the lower 32 bits include the valid data
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT) {
data->weight.full = latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
} else {
data->weight.var1_dw = (u32)latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
if (sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = get_data_src(event, meminfo->aux);
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
data->addr = meminfo->address;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
gprs ? gprs->ax : 0);
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
__setup_pebs_meminfo_group(event, data, sample_type, latency,
instr_latency, meminfo->address,
meminfo->aux, meminfo->tsx_tuning,
ax);
}
if (format_group & PEBS_DATACFG_XMMS) {