mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
perf/x86/intel/ds: Factor out PEBS group processing code to functions
Adaptive PEBS and arch-PEBS share lots of same code to process these PEBS groups, like basic, GPR and meminfo groups. Extract these shared code to generic functions to avoid duplicated code. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-8-dapeng1.mi@linux.intel.com
This commit is contained in:
committed by
Peter Zijlstra
parent
8807d92270
commit
167cde7dc9
@@ -2072,6 +2072,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
|
||||
|
||||
#define PEBS_LATENCY_MASK 0xffff
|
||||
|
||||
static inline void __setup_perf_sample_data(struct perf_event *event,
|
||||
struct pt_regs *iregs,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
|
||||
/*
|
||||
* We must however always use iregs for the unwinder to stay sane; the
|
||||
* record BP,SP,IP can point into thin air when the record is from a
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
}
|
||||
|
||||
static inline void __setup_pebs_basic_group(struct perf_event *event,
|
||||
struct pt_regs *regs,
|
||||
struct perf_sample_data *data,
|
||||
u64 sample_type, u64 ip,
|
||||
u64 tsc, u16 retire)
|
||||
{
|
||||
/* The ip in basic is EventingIP */
|
||||
set_linear_ip(regs, ip);
|
||||
regs->flags = PERF_EFLAGS_EXACT;
|
||||
setup_pebs_time(event, data, tsc);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
|
||||
data->weight.var3_w = retire;
|
||||
}
|
||||
|
||||
static inline void __setup_pebs_gpr_group(struct perf_event *event,
|
||||
struct pt_regs *regs,
|
||||
struct pebs_gprs *gprs,
|
||||
u64 sample_type)
|
||||
{
|
||||
if (event->attr.precise_ip < 2) {
|
||||
set_linear_ip(regs, gprs->ip);
|
||||
regs->flags &= ~PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
|
||||
adaptive_pebs_save_regs(regs, gprs);
|
||||
}
|
||||
|
||||
static inline void __setup_pebs_meminfo_group(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
u64 sample_type, u64 latency,
|
||||
u16 instr_latency, u64 address,
|
||||
u64 aux, u64 tsx_tuning, u64 ax)
|
||||
{
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||
u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
|
||||
|
||||
data->weight.var2_w = instr_latency;
|
||||
|
||||
/*
|
||||
* Although meminfo::latency is defined as a u64,
|
||||
* only the lower 32 bits include the valid data
|
||||
* in practice on Ice Lake and earlier platforms.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
data->weight.full = latency ?: tsx_latency;
|
||||
else
|
||||
data->weight.var1_dw = (u32)latency ?: tsx_latency;
|
||||
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
data->data_src.val = get_data_src(event, aux);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
|
||||
data->addr = address;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||
data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
|
||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* With adaptive PEBS the layout depends on what fields are configured.
|
||||
*/
|
||||
@@ -2081,12 +2165,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 sample_type = event->attr.sample_type;
|
||||
struct pebs_basic *basic = __pebs;
|
||||
void *next_record = basic + 1;
|
||||
u64 sample_type, format_group;
|
||||
struct pebs_meminfo *meminfo = NULL;
|
||||
struct pebs_gprs *gprs = NULL;
|
||||
struct x86_perf_regs *perf_regs;
|
||||
u64 format_group;
|
||||
u16 retire;
|
||||
|
||||
if (basic == NULL)
|
||||
return;
|
||||
@@ -2094,31 +2180,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
||||
perf_regs->xmm_regs = NULL;
|
||||
|
||||
sample_type = event->attr.sample_type;
|
||||
format_group = basic->format_group;
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
|
||||
setup_pebs_time(event, data, basic->tsc);
|
||||
|
||||
/*
|
||||
* We must however always use iregs for the unwinder to stay sane; the
|
||||
* record BP,SP,IP can point into thin air when the record is from a
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
__setup_perf_sample_data(event, iregs, data);
|
||||
|
||||
*regs = *iregs;
|
||||
/* The ip in basic is EventingIP */
|
||||
set_linear_ip(regs, basic->ip);
|
||||
regs->flags = PERF_EFLAGS_EXACT;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
||||
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
|
||||
data->weight.var3_w = basic->retire_latency;
|
||||
else
|
||||
data->weight.var3_w = 0;
|
||||
}
|
||||
/* basic group */
|
||||
retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
|
||||
basic->retire_latency : 0;
|
||||
__setup_pebs_basic_group(event, regs, data, sample_type,
|
||||
basic->ip, basic->tsc, retire);
|
||||
|
||||
/*
|
||||
* The record for MEMINFO is in front of GP
|
||||
@@ -2134,54 +2206,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
gprs = next_record;
|
||||
next_record = gprs + 1;
|
||||
|
||||
if (event->attr.precise_ip < 2) {
|
||||
set_linear_ip(regs, gprs->ip);
|
||||
regs->flags &= ~PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
|
||||
adaptive_pebs_save_regs(regs, gprs);
|
||||
__setup_pebs_gpr_group(event, regs, gprs, sample_type);
|
||||
}
|
||||
|
||||
if (format_group & PEBS_DATACFG_MEMINFO) {
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||
meminfo->cache_latency : meminfo->mem_latency;
|
||||
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||
meminfo->cache_latency : meminfo->mem_latency;
|
||||
u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||
meminfo->instr_latency : 0;
|
||||
u64 ax = gprs ? gprs->ax : 0;
|
||||
|
||||
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
|
||||
data->weight.var2_w = meminfo->instr_latency;
|
||||
|
||||
/*
|
||||
* Although meminfo::latency is defined as a u64,
|
||||
* only the lower 32 bits include the valid data
|
||||
* in practice on Ice Lake and earlier platforms.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT) {
|
||||
data->weight.full = latency ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
} else {
|
||||
data->weight.var1_dw = (u32)latency ?:
|
||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
||||
}
|
||||
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
data->data_src.val = get_data_src(event, meminfo->aux);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
|
||||
data->addr = meminfo->address;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
|
||||
gprs ? gprs->ax : 0);
|
||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||
}
|
||||
__setup_pebs_meminfo_group(event, data, sample_type, latency,
|
||||
instr_latency, meminfo->address,
|
||||
meminfo->aux, meminfo->tsx_tuning,
|
||||
ax);
|
||||
}
|
||||
|
||||
if (format_group & PEBS_DATACFG_XMMS) {
|
||||
|
||||
Reference in New Issue
Block a user