mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge tag 'drm-xe-fixes-2025-07-03' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Fix chunking the PTE updates and overflowing the maximum number of dwords with with MI_STORE_DATA_IMM (Jia Yao) - Move WA BB to the LRC BO to mitigate hangs on context switch (Matthew Brost) - Fix frequency/flush WAs for BMG (Vinay / Lucas) - Fix kconfig prompt title and description (Lucas) - Do not require kunit (Harry Austen / Lucas) - Extend 14018094691 WA to BMG (Daniele) - Fix wedging the device on signal (Matthew Brost) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://lore.kernel.org/r/o5662wz6nrlf6xt5sjgxq5oe6qoujefzywuwblm3m626hreifv@foqayqydd6ig
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config DRM_XE
|
||||
tristate "Intel Xe Graphics"
|
||||
depends on DRM && PCI && (m || (y && KUNIT=y))
|
||||
tristate "Intel Xe2 Graphics"
|
||||
depends on DRM && PCI
|
||||
depends on KUNIT || !KUNIT
|
||||
depends on INTEL_VSEC || !INTEL_VSEC
|
||||
depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
|
||||
select INTERVAL_TREE
|
||||
@@ -46,7 +47,8 @@ config DRM_XE
|
||||
select AUXILIARY_BUS
|
||||
select HMM_MIRROR
|
||||
help
|
||||
Experimental driver for Intel Xe series GPUs
|
||||
Driver for Intel Xe2 series GPUs and later. Experimental support
|
||||
for Xe series is also available.
|
||||
|
||||
If "M" is selected, the module will be called xe.
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "xe_gt_printk.h"
|
||||
#include "xe_gt_sriov_vf.h"
|
||||
#include "xe_guc.h"
|
||||
#include "xe_guc_pc.h"
|
||||
#include "xe_hw_engine_group.h"
|
||||
#include "xe_hwmon.h"
|
||||
#include "xe_irq.h"
|
||||
@@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe)
|
||||
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_device_td_flush() - Flush transient L3 cache entries
|
||||
* @xe: The device
|
||||
*
|
||||
* Display engine has direct access to memory and is never coherent with L3/L4
|
||||
* caches (or CPU caches), however KMD is responsible for specifically flushing
|
||||
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
|
||||
* can happen from such a surface without seeing corruption.
|
||||
*
|
||||
* Display surfaces can be tagged as transient by mapping it using one of the
|
||||
* various L3:XD PAT index modes on Xe2.
|
||||
*
|
||||
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
|
||||
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
|
||||
* Media is not coherent with L3 and we want to support render-vs-media
|
||||
* usescases. For other engines like copy/blt the HW internally forces uncached
|
||||
* behaviour, hence why we can skip the TDF on such platforms.
|
||||
/*
|
||||
* Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
|
||||
*/
|
||||
void xe_device_td_flush(struct xe_device *xe)
|
||||
static void tdf_request_sync(struct xe_device *xe)
|
||||
{
|
||||
struct xe_gt *gt;
|
||||
unsigned int fw_ref;
|
||||
struct xe_gt *gt;
|
||||
u8 id;
|
||||
|
||||
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
|
||||
return;
|
||||
|
||||
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
|
||||
xe_device_l2_flush(xe);
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_gt(gt, xe, id) {
|
||||
if (xe_gt_is_media_type(gt))
|
||||
continue;
|
||||
@@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe)
|
||||
return;
|
||||
|
||||
xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
|
||||
|
||||
/*
|
||||
* FIXME: We can likely do better here with our choice of
|
||||
* timeout. Currently we just assume the worst case, i.e. 150us,
|
||||
@@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe)
|
||||
return;
|
||||
|
||||
spin_lock(>->global_invl_lock);
|
||||
xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1);
|
||||
|
||||
xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1);
|
||||
if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
|
||||
xe_gt_err_once(gt, "Global invalidation timeout\n");
|
||||
|
||||
spin_unlock(>->global_invl_lock);
|
||||
|
||||
xe_force_wake_put(gt_to_fw(gt), fw_ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_device_td_flush() - Flush transient L3 cache entries
|
||||
* @xe: The device
|
||||
*
|
||||
* Display engine has direct access to memory and is never coherent with L3/L4
|
||||
* caches (or CPU caches), however KMD is responsible for specifically flushing
|
||||
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
|
||||
* can happen from such a surface without seeing corruption.
|
||||
*
|
||||
* Display surfaces can be tagged as transient by mapping it using one of the
|
||||
* various L3:XD PAT index modes on Xe2.
|
||||
*
|
||||
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
|
||||
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
|
||||
* Media is not coherent with L3 and we want to support render-vs-media
|
||||
* usescases. For other engines like copy/blt the HW internally forces uncached
|
||||
* behaviour, hence why we can skip the TDF on such platforms.
|
||||
*/
|
||||
void xe_device_td_flush(struct xe_device *xe)
|
||||
{
|
||||
struct xe_gt *root_gt;
|
||||
|
||||
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
|
||||
return;
|
||||
|
||||
root_gt = xe_root_mmio_gt(xe);
|
||||
if (XE_WA(root_gt, 16023588340)) {
|
||||
/* A transient flush is not sufficient: flush the L2 */
|
||||
xe_device_l2_flush(xe);
|
||||
} else {
|
||||
xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
|
||||
tdf_request_sync(xe);
|
||||
xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
|
||||
}
|
||||
}
|
||||
|
||||
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
|
||||
{
|
||||
return xe_device_has_flat_ccs(xe) ?
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include <drm/drm_drv.h>
|
||||
|
||||
#define DRIVER_NAME "xe"
|
||||
#define DRIVER_DESC "Intel Xe Graphics"
|
||||
#define DRIVER_DESC "Intel Xe2 Graphics"
|
||||
|
||||
/* Interface history:
|
||||
*
|
||||
|
||||
@@ -5,8 +5,11 @@
|
||||
|
||||
#include "xe_guc_pc.h"
|
||||
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/wait_bit.h>
|
||||
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
@@ -51,9 +54,12 @@
|
||||
|
||||
#define LNL_MERT_FREQ_CAP 800
|
||||
#define BMG_MERT_FREQ_CAP 2133
|
||||
#define BMG_MIN_FREQ 1200
|
||||
#define BMG_MERT_FLUSH_FREQ_CAP 2600
|
||||
|
||||
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
|
||||
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
|
||||
#define SLPC_ACT_FREQ_TIMEOUT_MS 100
|
||||
|
||||
/**
|
||||
* DOC: GuC Power Conservation (PC)
|
||||
@@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
static int wait_for_flush_complete(struct xe_guc_pc *pc)
|
||||
{
|
||||
const unsigned long timeout = msecs_to_jiffies(30);
|
||||
|
||||
if (!wait_var_event_timeout(&pc->flush_freq_limit,
|
||||
!atomic_read(&pc->flush_freq_limit),
|
||||
timeout))
|
||||
return -ETIMEDOUT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
|
||||
int slept, wait = 10;
|
||||
|
||||
for (slept = 0; slept < timeout_us;) {
|
||||
if (xe_guc_pc_get_act_freq(pc) <= freq)
|
||||
return 0;
|
||||
|
||||
usleep_range(wait, wait << 1);
|
||||
slept += wait;
|
||||
wait <<= 1;
|
||||
if (slept + wait > timeout_us)
|
||||
wait = timeout_us - slept;
|
||||
}
|
||||
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
static int pc_action_reset(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_guc_ct *ct = pc_to_ct(pc);
|
||||
@@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
|
||||
return pc->rpn_freq;
|
||||
}
|
||||
|
||||
static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&pc->freq_lock);
|
||||
|
||||
/* Might be in the middle of a gt reset */
|
||||
if (!pc->freq_ready)
|
||||
return -EAGAIN;
|
||||
|
||||
ret = pc_action_query_task_state(pc);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
*freq = pc_get_min_freq(pc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_guc_pc_get_min_freq - Get the min operational frequency
|
||||
* @pc: The GuC PC
|
||||
@@ -562,27 +617,29 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
|
||||
* -EAGAIN if GuC PC not ready (likely in middle of a reset).
|
||||
*/
|
||||
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
|
||||
{
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
return xe_guc_pc_get_min_freq_locked(pc, freq);
|
||||
}
|
||||
|
||||
static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
xe_device_assert_mem_access(pc_to_xe(pc));
|
||||
lockdep_assert_held(&pc->freq_lock);
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
if (!pc->freq_ready) {
|
||||
/* Might be in the middle of a gt reset */
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
/* Might be in the middle of a gt reset */
|
||||
if (!pc->freq_ready)
|
||||
return -EAGAIN;
|
||||
|
||||
ret = pc_action_query_task_state(pc);
|
||||
ret = pc_set_min_freq(pc, freq);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
*freq = pc_get_min_freq(pc);
|
||||
pc->user_requested_min = freq;
|
||||
|
||||
out:
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -595,25 +652,29 @@ out:
|
||||
* -EINVAL if value out of bounds.
|
||||
*/
|
||||
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
return xe_guc_pc_set_min_freq_locked(pc, freq);
|
||||
}
|
||||
|
||||
static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
if (!pc->freq_ready) {
|
||||
/* Might be in the middle of a gt reset */
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
lockdep_assert_held(&pc->freq_lock);
|
||||
|
||||
ret = pc_set_min_freq(pc, freq);
|
||||
/* Might be in the middle of a gt reset */
|
||||
if (!pc->freq_ready)
|
||||
return -EAGAIN;
|
||||
|
||||
ret = pc_action_query_task_state(pc);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
pc->user_requested_min = freq;
|
||||
*freq = pc_get_max_freq(pc);
|
||||
|
||||
out:
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -625,25 +686,29 @@ out:
|
||||
* -EAGAIN if GuC PC not ready (likely in middle of a reset).
|
||||
*/
|
||||
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
|
||||
{
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
return xe_guc_pc_get_max_freq_locked(pc, freq);
|
||||
}
|
||||
|
||||
static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
if (!pc->freq_ready) {
|
||||
/* Might be in the middle of a gt reset */
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
lockdep_assert_held(&pc->freq_lock);
|
||||
|
||||
ret = pc_action_query_task_state(pc);
|
||||
/* Might be in the middle of a gt reset */
|
||||
if (!pc->freq_ready)
|
||||
return -EAGAIN;
|
||||
|
||||
ret = pc_set_max_freq(pc, freq);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
*freq = pc_get_max_freq(pc);
|
||||
pc->user_requested_max = freq;
|
||||
|
||||
out:
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -657,24 +722,14 @@ out:
|
||||
*/
|
||||
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
if (!pc->freq_ready) {
|
||||
/* Might be in the middle of a gt reset */
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
if (XE_WA(pc_to_gt(pc), 22019338487)) {
|
||||
if (wait_for_flush_complete(pc) != 0)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
ret = pc_set_max_freq(pc, freq);
|
||||
if (ret)
|
||||
goto out;
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
pc->user_requested_max = freq;
|
||||
|
||||
out:
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
return ret;
|
||||
return xe_guc_pc_set_max_freq_locked(pc, freq);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
|
||||
|
||||
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&pc->freq_lock);
|
||||
@@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
|
||||
if (pc_get_min_freq(pc) > pc->rp0_freq)
|
||||
ret = pc_set_min_freq(pc, pc->rp0_freq);
|
||||
|
||||
if (XE_WA(tile->primary_gt, 14022085890))
|
||||
ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -868,29 +927,116 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
|
||||
static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
|
||||
return XE_WA(gt, 22019338487) &&
|
||||
pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
|
||||
* @pc: the xe_guc_pc object
|
||||
*
|
||||
* As per the WA, reduce max GT frequency during L2 cache flush
|
||||
*/
|
||||
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
u32 max_freq;
|
||||
int ret;
|
||||
|
||||
if (!needs_flush_freq_limit(pc))
|
||||
return;
|
||||
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
|
||||
if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
|
||||
ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
|
||||
if (ret) {
|
||||
xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
|
||||
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_set(&pc->flush_freq_limit, 1);
|
||||
|
||||
/*
|
||||
* If user has previously changed max freq, stash that value to
|
||||
* restore later, otherwise use the current max. New user
|
||||
* requests wait on flush.
|
||||
*/
|
||||
if (pc->user_requested_max != 0)
|
||||
pc->stashed_max_freq = pc->user_requested_max;
|
||||
else
|
||||
pc->stashed_max_freq = max_freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for actual freq to go below the flush cap: even if the previous
|
||||
* max was below cap, the current one might still be above it
|
||||
*/
|
||||
ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
|
||||
if (ret)
|
||||
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
|
||||
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
|
||||
}
|
||||
|
||||
/**
|
||||
* xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
|
||||
* @pc: the xe_guc_pc object
|
||||
*
|
||||
* Retrieve the previous GT max frequency value.
|
||||
*/
|
||||
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
|
||||
{
|
||||
struct xe_gt *gt = pc_to_gt(pc);
|
||||
int ret = 0;
|
||||
|
||||
if (XE_WA(pc_to_gt(pc), 22019338487)) {
|
||||
/*
|
||||
* Get updated min/max and stash them.
|
||||
*/
|
||||
ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
|
||||
if (!ret)
|
||||
ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!needs_flush_freq_limit(pc))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
|
||||
*/
|
||||
mutex_lock(&pc->freq_lock);
|
||||
ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
|
||||
if (!ret)
|
||||
ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
}
|
||||
if (!atomic_read(&pc->flush_freq_limit))
|
||||
return;
|
||||
|
||||
mutex_lock(&pc->freq_lock);
|
||||
|
||||
ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq);
|
||||
if (ret)
|
||||
xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
|
||||
pc->stashed_max_freq, ret);
|
||||
|
||||
atomic_set(&pc->flush_freq_limit, 0);
|
||||
mutex_unlock(&pc->freq_lock);
|
||||
wake_up_var(&pc->flush_freq_limit);
|
||||
}
|
||||
|
||||
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!XE_WA(pc_to_gt(pc), 22019338487))
|
||||
return 0;
|
||||
|
||||
guard(mutex)(&pc->freq_lock);
|
||||
|
||||
/*
|
||||
* Get updated min/max and stash them.
|
||||
*/
|
||||
ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
|
||||
if (!ret)
|
||||
ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
|
||||
*/
|
||||
ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
|
||||
if (!ret)
|
||||
ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
|
||||
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
|
||||
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
|
||||
|
||||
#endif /* _XE_GUC_PC_H_ */
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
struct xe_guc_pc {
|
||||
/** @bo: GGTT buffer object that is shared with GuC PC */
|
||||
struct xe_bo *bo;
|
||||
/** @flush_freq_limit: 1 when max freq changes are limited by driver */
|
||||
atomic_t flush_freq_limit;
|
||||
/** @rp0_freq: HW RP0 frequency - The Maximum one */
|
||||
u32 rp0_freq;
|
||||
/** @rpa_freq: HW RPa frequency - The Achievable one */
|
||||
|
||||
@@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
|
||||
struct xe_exec_queue *q = ge->q;
|
||||
struct xe_guc *guc = exec_queue_to_guc(q);
|
||||
struct xe_gpu_scheduler *sched = &ge->sched;
|
||||
bool wedged;
|
||||
bool wedged = false;
|
||||
|
||||
xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
|
||||
trace_xe_exec_queue_lr_cleanup(q);
|
||||
|
||||
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
|
||||
if (!exec_queue_killed(q))
|
||||
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
|
||||
|
||||
/* Kill the run_job / process_msg entry points */
|
||||
xe_sched_submission_stop(sched);
|
||||
@@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
|
||||
int err = -ETIME;
|
||||
pid_t pid = -1;
|
||||
int i = 0;
|
||||
bool wedged, skip_timeout_check;
|
||||
bool wedged = false, skip_timeout_check;
|
||||
|
||||
/*
|
||||
* TDR has fired before free job worker. Common if exec queue
|
||||
@@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
|
||||
* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
|
||||
* genuine timeouts.
|
||||
*/
|
||||
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
|
||||
if (!exec_queue_killed(q))
|
||||
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
|
||||
|
||||
/* Engine state now stable, disable scheduling to check timestamp */
|
||||
if (!wedged && exec_queue_registered(q)) {
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
|
||||
#define LRC_PPHWSP_SIZE SZ_4K
|
||||
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
|
||||
#define LRC_WA_BB_SIZE SZ_4K
|
||||
|
||||
static struct xe_device *
|
||||
lrc_to_xe(struct xe_lrc *lrc)
|
||||
@@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
|
||||
{
|
||||
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
|
||||
xe_bo_unpin_map_no_vm(lrc->bo);
|
||||
xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
|
||||
}
|
||||
|
||||
static size_t wa_bb_offset(struct xe_lrc *lrc)
|
||||
{
|
||||
return lrc->bo->size - LRC_WA_BB_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
|
||||
#define CONTEXT_ACTIVE 1ULL
|
||||
static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
|
||||
{
|
||||
const size_t max_size = LRC_WA_BB_SIZE;
|
||||
u32 *cmd, *buf = NULL;
|
||||
|
||||
if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
|
||||
buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL);
|
||||
if (lrc->bo->vmap.is_iomem) {
|
||||
buf = kmalloc(max_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
cmd = buf;
|
||||
} else {
|
||||
cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
|
||||
cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
|
||||
}
|
||||
|
||||
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
|
||||
@@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
|
||||
*cmd++ = MI_BATCH_BUFFER_END;
|
||||
|
||||
if (buf) {
|
||||
xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0,
|
||||
buf, (cmd - buf) * sizeof(*cmd));
|
||||
xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
|
||||
wa_bb_offset(lrc), buf,
|
||||
(cmd - buf) * sizeof(*cmd));
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
|
||||
xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
|
||||
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
|
||||
wa_bb_offset(lrc) + 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
|
||||
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
|
||||
* via VM bind calls.
|
||||
*/
|
||||
lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
|
||||
lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
|
||||
lrc_size + LRC_WA_BB_SIZE,
|
||||
ttm_bo_type_kernel,
|
||||
bo_flags);
|
||||
if (IS_ERR(lrc->bo))
|
||||
return PTR_ERR(lrc->bo);
|
||||
|
||||
lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
|
||||
ttm_bo_type_kernel,
|
||||
bo_flags);
|
||||
if (IS_ERR(lrc->bb_per_ctx_bo)) {
|
||||
err = PTR_ERR(lrc->bb_per_ctx_bo);
|
||||
goto err_lrc_finish;
|
||||
}
|
||||
|
||||
lrc->size = lrc_size;
|
||||
lrc->ring.size = ring_size;
|
||||
lrc->ring.tail = 0;
|
||||
@@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
|
||||
snapshot->seqno = xe_lrc_seqno(lrc);
|
||||
snapshot->lrc_bo = xe_bo_get(lrc->bo);
|
||||
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
|
||||
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
|
||||
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
|
||||
LRC_WA_BB_SIZE;
|
||||
snapshot->lrc_snapshot = NULL;
|
||||
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
|
||||
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
|
||||
|
||||
@@ -53,9 +53,6 @@ struct xe_lrc {
|
||||
|
||||
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
|
||||
u64 ctx_timestamp;
|
||||
|
||||
/** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
|
||||
struct xe_bo *bb_per_ctx_bo;
|
||||
};
|
||||
|
||||
struct xe_lrc_snapshot;
|
||||
|
||||
@@ -82,7 +82,7 @@ struct xe_migrate {
|
||||
* of the instruction. Subtracting the instruction header (1 dword) and
|
||||
* address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
|
||||
*/
|
||||
#define MAX_PTE_PER_SDI 0x1FE
|
||||
#define MAX_PTE_PER_SDI 0x1FEU
|
||||
|
||||
/**
|
||||
* xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
|
||||
@@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size)
|
||||
u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
|
||||
|
||||
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
|
||||
|
||||
/*
|
||||
* MI_STORE_DATA_IMM command is used to update page table. Each
|
||||
* instruction can update maximumly 0x1ff pte entries. To update
|
||||
* n (n <= 0x1ff) pte entries, we need:
|
||||
* 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
|
||||
* 2 dword for the page table's physical location
|
||||
* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
|
||||
* instruction can update maximumly MAX_PTE_PER_SDI pte entries. To
|
||||
* update n (n <= MAX_PTE_PER_SDI) pte entries, we need:
|
||||
*
|
||||
* - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
|
||||
* - 2 dword for the page table's physical location
|
||||
* - 2*n dword for value of pte to fill (each pte entry is 2 dwords)
|
||||
*/
|
||||
num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
|
||||
num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI);
|
||||
num_dword += entries * 2;
|
||||
|
||||
return num_dword;
|
||||
@@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
|
||||
|
||||
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
|
||||
while (ptes) {
|
||||
u32 chunk = min(0x1ffU, ptes);
|
||||
u32 chunk = min(MAX_PTE_PER_SDI, ptes);
|
||||
|
||||
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
|
||||
bb->cs[bb->len++] = pt_offset;
|
||||
|
||||
@@ -21,7 +21,8 @@
|
||||
GRAPHICS_VERSION_RANGE(1270, 1274)
|
||||
MEDIA_VERSION(1300)
|
||||
PLATFORM(DG2)
|
||||
14018094691 GRAPHICS_VERSION(2004)
|
||||
14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
|
||||
GRAPHICS_VERSION(2004)
|
||||
14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
|
||||
18024947630 GRAPHICS_VERSION(2001)
|
||||
GRAPHICS_VERSION(2004)
|
||||
@@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000)
|
||||
MEDIA_VERSION_RANGE(1301, 3000)
|
||||
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
|
||||
MEDIA_VERSION_RANGE(1300, 3000)
|
||||
|
||||
# SoC workaround - currently applies to all platforms with the following
|
||||
# primary GT GMDID
|
||||
14022085890 GRAPHICS_VERSION(2001)
|
||||
|
||||
Reference in New Issue
Block a user