mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge branches 'pm-cpuidle' and 'pm-powercap'
Merge cpuidle and power capping updates for 6.19-rc1: - Use residency threshold in polling state override decisions in the menu cpuidle governor (Aboorva Devarajan) - Add sanity check for exit latency and target residency in the cpufreq core (Rafael Wysocki) - Use this_cpu_ptr() where possible in the teo governor (Christian Loehle) - Rework the handling of tick wakeups in the teo cpuidle governor to increase the likelihood of stopping the scheduler tick in the cases when tick wakeups can be counted as non-timer ones (Rafael Wysocki) - Fix a reverse condition in the teo cpuidle governor and drop a misguided target residency check from it (Rafael Wysocki) - Clean up muliple minor defects in the teo cpuidle governor (Rafael Wysocki) - Update header inclusion to make it follow the Include What You Use principle (Andy Shevchenko) - Enable MSR-based RAPL PMU support in the intel_rapl power capping driver and arrange for using it on the Panther Lake and Wildcat Lake processors (Kuppuswamy Sathyanarayanan) - Add support for Nova Lake and Wildcat Lake processors to the intel_rapl power capping driver (Kaushlendra Kumar, Srinivas Pandruvada) * pm-cpuidle: cpuidle: Warn instead of bailing out if target residency check fails cpuidle: Update header inclusion cpuidle: governors: teo: Add missing space to the description cpuidle: governors: teo: Simplify intercepts-based state lookup cpuidle: governors: teo: Fix tick_intercepts handling in teo_update() cpuidle: governors: teo: Rework the handling of tick wakeups cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold cpuidle: governors: teo: Use s64 consistently in teo_update() cpuidle: governors: teo: Drop redundant function parameter cpuidle: governors: teo: Drop misguided target residency check cpuidle: teo: Use this_cpu_ptr() where possible cpuidle: Add sanity check for exit latency and target residency cpuidle: menu: Use residency threshold in polling state override decisions * pm-powercap: powercap: intel_rapl: Enable MSR-based RAPL PMU support powercap: intel_rapl: Prepare read_raw() interface for atomic-context callers powercap: intel_rapl: Add support for Nova Lake processors powercap: intel_rapl: Add support for Wildcat Lake platform
This commit is contained in:
@@ -8,6 +8,8 @@
|
||||
* This code is licenced under the GPL.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
@@ -193,6 +195,14 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
|
||||
s->exit_latency_ns = 0;
|
||||
else
|
||||
s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
|
||||
|
||||
/*
|
||||
* Warn if the exit latency of a CPU idle state exceeds its
|
||||
* target residency which is assumed to never happen in cpuidle
|
||||
* in multiple places.
|
||||
*/
|
||||
if (s->exit_latency_ns > s->target_residency_ns)
|
||||
pr_warn("Idle state %d target residency too low\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -317,12 +317,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a physical idle state, not busy polling, unless a timer
|
||||
* is going to trigger soon enough or the exit latency of the
|
||||
* idle state in question is greater than the predicted idle
|
||||
* duration.
|
||||
* Use a physical idle state instead of busy polling so long as
|
||||
* its target residency is below the residency threshold, its
|
||||
* exit latency is not greater than the predicted idle duration,
|
||||
* and the next timer doesn't expire soon.
|
||||
*/
|
||||
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
|
||||
s->target_residency_ns < RESIDENCY_THRESHOLD_NS &&
|
||||
s->target_residency_ns <= data->next_timer_ns &&
|
||||
s->exit_latency_ns <= predicted_ns) {
|
||||
predicted_ns = s->target_residency_ns;
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
* likely woken up by a non-timer wakeup source).
|
||||
*
|
||||
* 2. If the second sum computed in step 1 is greater than a half of the sum of
|
||||
* both metrics for the candidate state bin and all subsequent bins(if any),
|
||||
* both metrics for the candidate state bin and all subsequent bins (if any),
|
||||
* a shallower idle state is likely to be more suitable, so look for it.
|
||||
*
|
||||
* - Traverse the enabled idle states shallower than the candidate one in the
|
||||
@@ -133,21 +133,33 @@ struct teo_bin {
|
||||
* @sleep_length_ns: Time till the closest timer event (at the selection time).
|
||||
* @state_bins: Idle state data bins for this CPU.
|
||||
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
|
||||
* @total_tick: Wakeups by the scheduler tick.
|
||||
* @tick_intercepts: "Intercepts" before TICK_NSEC.
|
||||
* @short_idles: Wakeups after short idle periods.
|
||||
* @artificial_wakeup: Set if the wakeup has been triggered by a safety net.
|
||||
* @tick_wakeup: Set if the last wakeup was by the scheduler tick.
|
||||
*/
|
||||
struct teo_cpu {
|
||||
s64 sleep_length_ns;
|
||||
struct teo_bin state_bins[CPUIDLE_STATE_MAX];
|
||||
unsigned int total;
|
||||
unsigned int total_tick;
|
||||
unsigned int tick_intercepts;
|
||||
unsigned int short_idles;
|
||||
bool artificial_wakeup;
|
||||
bool tick_wakeup;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
||||
|
||||
static void teo_decay(unsigned int *metric)
|
||||
{
|
||||
unsigned int delta = *metric >> DECAY_SHIFT;
|
||||
|
||||
if (delta)
|
||||
*metric -= delta;
|
||||
else
|
||||
*metric = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* teo_update - Update CPU metrics after wakeup.
|
||||
* @drv: cpuidle driver containing state data.
|
||||
@@ -155,21 +167,22 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
||||
*/
|
||||
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
|
||||
int i, idx_timer = 0, idx_duration = 0;
|
||||
s64 target_residency_ns;
|
||||
u64 measured_ns;
|
||||
s64 target_residency_ns, measured_ns;
|
||||
unsigned int total = 0;
|
||||
|
||||
cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
|
||||
teo_decay(&cpu_data->short_idles);
|
||||
|
||||
if (cpu_data->artificial_wakeup) {
|
||||
if (dev->poll_time_limit) {
|
||||
dev->poll_time_limit = false;
|
||||
/*
|
||||
* If one of the safety nets has triggered, assume that this
|
||||
* Polling state timeout has triggered, so assume that this
|
||||
* might have been a long sleep.
|
||||
*/
|
||||
measured_ns = U64_MAX;
|
||||
measured_ns = S64_MAX;
|
||||
} else {
|
||||
u64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
|
||||
s64 lat_ns = drv->states[dev->last_state_idx].exit_latency_ns;
|
||||
|
||||
measured_ns = dev->last_residency_ns;
|
||||
/*
|
||||
@@ -196,8 +209,10 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
for (i = 0; i < drv->state_count; i++) {
|
||||
struct teo_bin *bin = &cpu_data->state_bins[i];
|
||||
|
||||
bin->hits -= bin->hits >> DECAY_SHIFT;
|
||||
bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
|
||||
teo_decay(&bin->hits);
|
||||
total += bin->hits;
|
||||
teo_decay(&bin->intercepts);
|
||||
total += bin->intercepts;
|
||||
|
||||
target_residency_ns = drv->states[i].target_residency_ns;
|
||||
|
||||
@@ -208,7 +223,24 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
}
|
||||
}
|
||||
|
||||
cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
|
||||
cpu_data->total = total + PULSE;
|
||||
|
||||
teo_decay(&cpu_data->tick_intercepts);
|
||||
|
||||
teo_decay(&cpu_data->total_tick);
|
||||
if (cpu_data->tick_wakeup) {
|
||||
cpu_data->total_tick += PULSE;
|
||||
/*
|
||||
* If tick wakeups dominate the wakeup pattern, count this one
|
||||
* as a hit on the deepest available idle state to increase the
|
||||
* likelihood of stopping the tick.
|
||||
*/
|
||||
if (3 * cpu_data->total_tick > 2 * cpu_data->total) {
|
||||
cpu_data->state_bins[drv->state_count-1].hits += PULSE;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the measured idle duration falls into the same bin as the sleep
|
||||
* length, this is a "hit", so update the "hits" metric for that bin.
|
||||
@@ -219,18 +251,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
cpu_data->state_bins[idx_timer].hits += PULSE;
|
||||
} else {
|
||||
cpu_data->state_bins[idx_duration].intercepts += PULSE;
|
||||
if (TICK_NSEC <= measured_ns)
|
||||
if (measured_ns <= TICK_NSEC)
|
||||
cpu_data->tick_intercepts += PULSE;
|
||||
}
|
||||
|
||||
cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
|
||||
cpu_data->total += PULSE;
|
||||
}
|
||||
|
||||
static bool teo_state_ok(int i, struct cpuidle_driver *drv)
|
||||
{
|
||||
return !tick_nohz_tick_stopped() ||
|
||||
drv->states[i].target_residency_ns >= TICK_NSEC;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -239,17 +262,15 @@ static bool teo_state_ok(int i, struct cpuidle_driver *drv)
|
||||
* @dev: Target CPU.
|
||||
* @state_idx: Index of the capping idle state.
|
||||
* @duration_ns: Idle duration value to match.
|
||||
* @no_poll: Don't consider polling states.
|
||||
*/
|
||||
static int teo_find_shallower_state(struct cpuidle_driver *drv,
|
||||
struct cpuidle_device *dev, int state_idx,
|
||||
s64 duration_ns, bool no_poll)
|
||||
s64 duration_ns)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = state_idx - 1; i >= 0; i--) {
|
||||
if (dev->states_usage[i].disable ||
|
||||
(no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
|
||||
if (dev->states_usage[i].disable)
|
||||
continue;
|
||||
|
||||
state_idx = i;
|
||||
@@ -268,7 +289,7 @@ static int teo_find_shallower_state(struct cpuidle_driver *drv,
|
||||
static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
bool *stop_tick)
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
|
||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||
ktime_t delta_tick = TICK_NSEC / 2;
|
||||
unsigned int idx_intercept_sum = 0;
|
||||
@@ -356,7 +377,18 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
* better choice.
|
||||
*/
|
||||
if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
|
||||
int first_suitable_idx = idx;
|
||||
int min_idx = idx0;
|
||||
|
||||
if (tick_nohz_tick_stopped()) {
|
||||
/*
|
||||
* Look for the shallowest idle state below the current
|
||||
* candidate one whose target residency is at least
|
||||
* equal to the tick period length.
|
||||
*/
|
||||
while (min_idx < idx &&
|
||||
drv->states[min_idx].target_residency_ns < TICK_NSEC)
|
||||
min_idx++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for the deepest idle state whose target residency had
|
||||
@@ -366,49 +398,14 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
* Take the possible duration limitation present if the tick
|
||||
* has been stopped already into account.
|
||||
*/
|
||||
intercept_sum = 0;
|
||||
|
||||
for (i = idx - 1; i >= 0; i--) {
|
||||
struct teo_bin *bin = &cpu_data->state_bins[i];
|
||||
|
||||
intercept_sum += bin->intercepts;
|
||||
|
||||
if (2 * intercept_sum > idx_intercept_sum) {
|
||||
/*
|
||||
* Use the current state unless it is too
|
||||
* shallow or disabled, in which case take the
|
||||
* first enabled state that is deep enough.
|
||||
*/
|
||||
if (teo_state_ok(i, drv) &&
|
||||
!dev->states_usage[i].disable) {
|
||||
idx = i;
|
||||
break;
|
||||
}
|
||||
idx = first_suitable_idx;
|
||||
break;
|
||||
}
|
||||
for (i = idx - 1, intercept_sum = 0; i >= min_idx; i--) {
|
||||
intercept_sum += cpu_data->state_bins[i].intercepts;
|
||||
|
||||
if (dev->states_usage[i].disable)
|
||||
continue;
|
||||
|
||||
if (teo_state_ok(i, drv)) {
|
||||
/*
|
||||
* The current state is deep enough, but still
|
||||
* there may be a better one.
|
||||
*/
|
||||
first_suitable_idx = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* The current state is too shallow, so if no suitable
|
||||
* states other than the initial candidate have been
|
||||
* found, give up (the remaining states to check are
|
||||
* shallower still), but otherwise the first suitable
|
||||
* state other than the initial candidate may turn out
|
||||
* to be preferable.
|
||||
*/
|
||||
if (first_suitable_idx == idx)
|
||||
idx = i;
|
||||
if (2 * intercept_sum > idx_intercept_sum)
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -458,11 +455,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
* If the closest expected timer is before the target residency of the
|
||||
* candidate state, a shallower one needs to be found.
|
||||
*/
|
||||
if (drv->states[idx].target_residency_ns > duration_ns) {
|
||||
i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
|
||||
if (teo_state_ok(i, drv))
|
||||
idx = i;
|
||||
}
|
||||
if (drv->states[idx].target_residency_ns > duration_ns)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, duration_ns);
|
||||
|
||||
/*
|
||||
* If the selected state's target residency is below the tick length
|
||||
@@ -490,7 +484,7 @@ end:
|
||||
*/
|
||||
if (idx > idx0 &&
|
||||
drv->states[idx].target_residency_ns > delta_tick)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
|
||||
|
||||
out_tick:
|
||||
*stop_tick = false;
|
||||
@@ -504,20 +498,11 @@ out_tick:
|
||||
*/
|
||||
static void teo_reflect(struct cpuidle_device *dev, int state)
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
struct teo_cpu *cpu_data = this_cpu_ptr(&teo_cpus);
|
||||
|
||||
cpu_data->tick_wakeup = tick_nohz_idle_got_tick();
|
||||
|
||||
dev->last_state_idx = state;
|
||||
if (dev->poll_time_limit ||
|
||||
(tick_nohz_idle_got_tick() && cpu_data->sleep_length_ns > TICK_NSEC)) {
|
||||
/*
|
||||
* The wakeup was not "genuine", but triggered by one of the
|
||||
* safety nets.
|
||||
*/
|
||||
dev->poll_time_limit = false;
|
||||
cpu_data->artificial_wakeup = true;
|
||||
} else {
|
||||
cpu_data->artificial_wakeup = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -4,9 +4,13 @@
|
||||
*/
|
||||
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/idle.h>
|
||||
#include <linux/sprintf.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define POLL_IDLE_RELAX_COUNT 200
|
||||
|
||||
|
||||
@@ -253,7 +253,8 @@ struct rapl_primitive_info {
|
||||
static void rapl_init_domains(struct rapl_package *rp);
|
||||
static int rapl_read_data_raw(struct rapl_domain *rd,
|
||||
enum rapl_primitives prim,
|
||||
bool xlate, u64 *data);
|
||||
bool xlate, u64 *data,
|
||||
bool atomic);
|
||||
static int rapl_write_data_raw(struct rapl_domain *rd,
|
||||
enum rapl_primitives prim,
|
||||
unsigned long long value);
|
||||
@@ -289,7 +290,7 @@ static int get_energy_counter(struct powercap_zone *power_zone,
|
||||
cpus_read_lock();
|
||||
rd = power_zone_to_rapl_domain(power_zone);
|
||||
|
||||
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now)) {
|
||||
if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now, false)) {
|
||||
*energy_raw = energy_now;
|
||||
cpus_read_unlock();
|
||||
|
||||
@@ -830,7 +831,8 @@ prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
|
||||
* 63-------------------------- 31--------------------------- 0
|
||||
*/
|
||||
static int rapl_read_data_raw(struct rapl_domain *rd,
|
||||
enum rapl_primitives prim, bool xlate, u64 *data)
|
||||
enum rapl_primitives prim, bool xlate, u64 *data,
|
||||
bool atomic)
|
||||
{
|
||||
u64 value;
|
||||
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
|
||||
@@ -852,7 +854,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
|
||||
|
||||
ra.mask = rpi->mask;
|
||||
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, atomic)) {
|
||||
pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
|
||||
return -EIO;
|
||||
}
|
||||
@@ -904,7 +906,7 @@ static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
|
||||
if (!is_pl_valid(rd, pl))
|
||||
return -EINVAL;
|
||||
|
||||
return rapl_read_data_raw(rd, prim, xlate, data);
|
||||
return rapl_read_data_raw(rd, prim, xlate, data, false);
|
||||
}
|
||||
|
||||
static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
|
||||
@@ -941,7 +943,7 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
|
||||
|
||||
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
|
||||
ra.mask = ~0;
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
|
||||
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
|
||||
ra.reg.val, rd->rp->name, rd->name);
|
||||
return -ENODEV;
|
||||
@@ -969,7 +971,7 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
|
||||
|
||||
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
|
||||
ra.mask = ~0;
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
|
||||
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
|
||||
ra.reg.val, rd->rp->name, rd->name);
|
||||
return -ENODEV;
|
||||
@@ -1156,7 +1158,7 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
|
||||
|
||||
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
|
||||
ra.mask = ~0;
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
|
||||
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
|
||||
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
|
||||
ra.reg.val, rd->rp->name, rd->name);
|
||||
return -ENODEV;
|
||||
@@ -1284,6 +1286,9 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
|
||||
X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
|
||||
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core),
|
||||
@@ -1325,7 +1330,7 @@ static void rapl_update_domain_data(struct rapl_package *rp)
|
||||
struct rapl_primitive_info *rpi = get_rpi(rp, prim);
|
||||
|
||||
if (!rapl_read_data_raw(&rp->domains[dmn], prim,
|
||||
rpi->unit, &val))
|
||||
rpi->unit, &val, false))
|
||||
rp->domains[dmn].rdd.primitives[prim] = val;
|
||||
}
|
||||
}
|
||||
@@ -1425,7 +1430,7 @@ static int rapl_check_domain(int domain, struct rapl_package *rp)
|
||||
*/
|
||||
|
||||
ra.mask = ENERGY_STATUS_MASK;
|
||||
if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
|
||||
if (rp->priv->read_raw(get_rid(rp), &ra, false) || !ra.value)
|
||||
return -ENODEV;
|
||||
|
||||
return 0;
|
||||
@@ -1592,11 +1597,11 @@ static int get_pmu_cpu(struct rapl_package *rp)
|
||||
if (!rp->has_pmu)
|
||||
return nr_cpu_ids;
|
||||
|
||||
/* Only TPMI RAPL is supported for now */
|
||||
if (rp->priv->type != RAPL_IF_TPMI)
|
||||
/* Only TPMI & MSR RAPL are supported for now */
|
||||
if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
|
||||
return nr_cpu_ids;
|
||||
|
||||
/* TPMI RAPL uses any CPU in the package for PMU */
|
||||
/* TPMI/MSR RAPL uses any CPU in the package for PMU */
|
||||
for_each_online_cpu(cpu)
|
||||
if (topology_physical_package_id(cpu) == rp->id)
|
||||
return cpu;
|
||||
@@ -1609,11 +1614,11 @@ static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu)
|
||||
if (!rp->has_pmu)
|
||||
return false;
|
||||
|
||||
/* Only TPMI RAPL is supported for now */
|
||||
if (rp->priv->type != RAPL_IF_TPMI)
|
||||
/* Only TPMI & MSR RAPL are supported for now */
|
||||
if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
|
||||
return false;
|
||||
|
||||
/* TPMI RAPL uses any CPU in the package for PMU */
|
||||
/* TPMI/MSR RAPL uses any CPU in the package for PMU */
|
||||
return topology_physical_package_id(cpu) == rp->id;
|
||||
}
|
||||
|
||||
@@ -1636,7 +1641,7 @@ static u64 event_read_counter(struct perf_event *event)
|
||||
if (event->hw.idx < 0)
|
||||
return 0;
|
||||
|
||||
ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val);
|
||||
ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val, true);
|
||||
|
||||
/* Return 0 for failed read */
|
||||
if (ret)
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
/* private data for RAPL MSR Interface */
|
||||
static struct rapl_if_priv *rapl_msr_priv;
|
||||
|
||||
static bool rapl_msr_pmu __ro_after_init;
|
||||
|
||||
static struct rapl_if_priv rapl_msr_priv_intel = {
|
||||
.type = RAPL_IF_MSR,
|
||||
.reg_unit.msr = MSR_RAPL_POWER_UNIT,
|
||||
@@ -79,6 +81,8 @@ static int rapl_cpu_online(unsigned int cpu)
|
||||
rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
|
||||
if (IS_ERR(rp))
|
||||
return PTR_ERR(rp);
|
||||
if (rapl_msr_pmu)
|
||||
rapl_package_add_pmu(rp);
|
||||
}
|
||||
cpumask_set_cpu(cpu, &rp->cpumask);
|
||||
return 0;
|
||||
@@ -95,19 +99,37 @@ static int rapl_cpu_down_prep(unsigned int cpu)
|
||||
|
||||
cpumask_clear_cpu(cpu, &rp->cpumask);
|
||||
lead_cpu = cpumask_first(&rp->cpumask);
|
||||
if (lead_cpu >= nr_cpu_ids)
|
||||
if (lead_cpu >= nr_cpu_ids) {
|
||||
if (rapl_msr_pmu)
|
||||
rapl_package_remove_pmu(rp);
|
||||
rapl_remove_package_cpuslocked(rp);
|
||||
else if (rp->lead_cpu == cpu)
|
||||
} else if (rp->lead_cpu == cpu) {
|
||||
rp->lead_cpu = lead_cpu;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
|
||||
static int rapl_msr_read_raw(int cpu, struct reg_action *ra, bool atomic)
|
||||
{
|
||||
/*
|
||||
* When called from atomic-context (eg PMU event handler)
|
||||
* perform MSR read directly using rdmsrq().
|
||||
*/
|
||||
if (atomic) {
|
||||
if (unlikely(smp_processor_id() != cpu))
|
||||
return -EIO;
|
||||
|
||||
rdmsrq(ra->reg.msr, ra->value);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rdmsrq_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
|
||||
pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
out:
|
||||
ra->value &= ra->mask;
|
||||
return 0;
|
||||
}
|
||||
@@ -151,6 +173,16 @@ static const struct x86_cpu_id pl4_support_ids[] = {
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_U, NULL),
|
||||
X86_MATCH_VFM(INTEL_ARROWLAKE_H, NULL),
|
||||
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
|
||||
X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
|
||||
X86_MATCH_VFM(INTEL_NOVALAKE, NULL),
|
||||
X86_MATCH_VFM(INTEL_NOVALAKE_L, NULL),
|
||||
{}
|
||||
};
|
||||
|
||||
/* List of MSR-based RAPL PMU support CPUs */
|
||||
static const struct x86_cpu_id pmu_support_ids[] = {
|
||||
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, NULL),
|
||||
X86_MATCH_VFM(INTEL_WILDCATLAKE_L, NULL),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -181,6 +213,11 @@ static int rapl_msr_probe(struct platform_device *pdev)
|
||||
pr_info("PL4 support detected.\n");
|
||||
}
|
||||
|
||||
if (x86_match_cpu(pmu_support_ids)) {
|
||||
rapl_msr_pmu = true;
|
||||
pr_info("MSR-based RAPL PMU support enabled\n");
|
||||
}
|
||||
|
||||
rapl_msr_priv->control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
|
||||
if (IS_ERR(rapl_msr_priv->control_type)) {
|
||||
pr_debug("failed to register powercap control_type.\n");
|
||||
|
||||
@@ -60,7 +60,7 @@ static DEFINE_MUTEX(tpmi_rapl_lock);
|
||||
|
||||
static struct powercap_control_type *tpmi_control_type;
|
||||
|
||||
static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
|
||||
static int tpmi_rapl_read_raw(int id, struct reg_action *ra, bool atomic)
|
||||
{
|
||||
if (!ra->reg.mmio)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -19,7 +19,7 @@ static const struct rapl_mmio_regs rapl_mmio_default = {
|
||||
.limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
|
||||
};
|
||||
|
||||
static int rapl_mmio_read_raw(int cpu, struct reg_action *ra)
|
||||
static int rapl_mmio_read_raw(int cpu, struct reg_action *ra, bool atomic)
|
||||
{
|
||||
if (!ra->reg.mmio)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -152,7 +152,7 @@ struct rapl_if_priv {
|
||||
union rapl_reg reg_unit;
|
||||
union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
|
||||
int limits[RAPL_DOMAIN_MAX];
|
||||
int (*read_raw)(int id, struct reg_action *ra);
|
||||
int (*read_raw)(int id, struct reg_action *ra, bool atomic);
|
||||
int (*write_raw)(int id, struct reg_action *ra);
|
||||
void *defaults;
|
||||
void *rpi;
|
||||
|
||||
Reference in New Issue
Block a user