Merge tag 'drm-xe-fixes-2025-07-03' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
- Fix chunking the PTE updates and overflowing the maximum number of
  dwords with with MI_STORE_DATA_IMM (Jia Yao)
- Move WA BB to the LRC BO to mitigate hangs on context switch (Matthew
  Brost)
- Fix frequency/flush WAs for BMG (Vinay / Lucas)
- Fix kconfig prompt title and description (Lucas)
- Do not require kunit (Harry Austen / Lucas)
- Extend 14018094691 WA to BMG (Daniele)
- Fix wedging the device on signal (Matthew Brost)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/o5662wz6nrlf6xt5sjgxq5oe6qoujefzywuwblm3m626hreifv@foqayqydd6ig
This commit is contained in:
Dave Airlie
2025-07-04 10:01:49 +10:00
11 changed files with 312 additions and 137 deletions

View File

@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
tristate "Intel Xe Graphics"
depends on DRM && PCI && (m || (y && KUNIT=y))
tristate "Intel Xe2 Graphics"
depends on DRM && PCI
depends on KUNIT || !KUNIT
depends on INTEL_VSEC || !INTEL_VSEC
depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
select INTERVAL_TREE
@@ -46,7 +47,8 @@ config DRM_XE
select AUXILIARY_BUS
select HMM_MIRROR
help
Experimental driver for Intel Xe series GPUs
Driver for Intel Xe2 series GPUs and later. Experimental support
for Xe series is also available.
If "M" is selected, the module will be called xe.

View File

@@ -40,6 +40,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
#include "xe_irq.h"
@@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe)
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
/**
* xe_device_td_flush() - Flush transient L3 cache entries
* @xe: The device
*
* Display engine has direct access to memory and is never coherent with L3/L4
* caches (or CPU caches), however KMD is responsible for specifically flushing
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
* can happen from such a surface without seeing corruption.
*
* Display surfaces can be tagged as transient by mapping it using one of the
* various L3:XD PAT index modes on Xe2.
*
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
* Media is not coherent with L3 and we want to support render-vs-media
* usescases. For other engines like copy/blt the HW internally forces uncached
* behaviour, hence why we can skip the TDF on such platforms.
/*
* Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
*/
void xe_device_td_flush(struct xe_device *xe)
static void tdf_request_sync(struct xe_device *xe)
{
struct xe_gt *gt;
unsigned int fw_ref;
struct xe_gt *gt;
u8 id;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
xe_device_l2_flush(xe);
return;
}
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe)
return;
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
/**
* xe_device_td_flush() - Flush transient L3 cache entries
* @xe: The device
*
* Display engine has direct access to memory and is never coherent with L3/L4
* caches (or CPU caches), however KMD is responsible for specifically flushing
* transient L3 GPU cache entries prior to the flip sequence to ensure scanout
* can happen from such a surface without seeing corruption.
*
* Display surfaces can be tagged as transient by mapping it using one of the
* various L3:XD PAT index modes on Xe2.
*
* Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
* at the end of each submission via PIPE_CONTROL for compute/render, since SA
* Media is not coherent with L3 and we want to support render-vs-media
* usescases. For other engines like copy/blt the HW internally forces uncached
* behaviour, hence why we can skip the TDF on such platforms.
*/
void xe_device_td_flush(struct xe_device *xe)
{
struct xe_gt *root_gt;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
root_gt = xe_root_mmio_gt(xe);
if (XE_WA(root_gt, 16023588340)) {
/* A transient flush is not sufficient: flush the L2 */
xe_device_l2_flush(xe);
} else {
xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
tdf_request_sync(xe);
xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
}
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?

View File

@@ -9,7 +9,7 @@
#include <drm/drm_drv.h>
#define DRIVER_NAME "xe"
#define DRIVER_DESC "Intel Xe Graphics"
#define DRIVER_DESC "Intel Xe2 Graphics"
/* Interface history:
*

View File

@@ -5,8 +5,11 @@
#include "xe_guc_pc.h"
#include <linux/cleanup.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
@@ -51,9 +54,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
#define BMG_MIN_FREQ 1200
#define BMG_MERT_FLUSH_FREQ_CAP 2600
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
return -ETIMEDOUT;
}
static int wait_for_flush_complete(struct xe_guc_pc *pc)
{
const unsigned long timeout = msecs_to_jiffies(30);
if (!wait_var_event_timeout(&pc->flush_freq_limit,
!atomic_read(&pc->flush_freq_limit),
timeout))
return -ETIMEDOUT;
return 0;
}
static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
{
int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
int slept, wait = 10;
for (slept = 0; slept < timeout_us;) {
if (xe_guc_pc_get_act_freq(pc) <= freq)
return 0;
usleep_range(wait, wait << 1);
slept += wait;
wait <<= 1;
if (slept + wait > timeout_us)
wait = timeout_us - slept;
}
return -ETIMEDOUT;
}
static int pc_action_reset(struct xe_guc_pc *pc)
{
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
return pc->rpn_freq;
}
static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
{
int ret;
lockdep_assert_held(&pc->freq_lock);
/* Might be in the middle of a gt reset */
if (!pc->freq_ready)
return -EAGAIN;
ret = pc_action_query_task_state(pc);
if (ret)
return ret;
*freq = pc_get_min_freq(pc);
return 0;
}
/**
* xe_guc_pc_get_min_freq - Get the min operational frequency
* @pc: The GuC PC
@@ -562,27 +617,29 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
* -EAGAIN if GuC PC not ready (likely in middle of a reset).
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
guard(mutex)(&pc->freq_lock);
return xe_guc_pc_get_min_freq_locked(pc, freq);
}
static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
{
int ret;
xe_device_assert_mem_access(pc_to_xe(pc));
lockdep_assert_held(&pc->freq_lock);
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
ret = -EAGAIN;
goto out;
}
/* Might be in the middle of a gt reset */
if (!pc->freq_ready)
return -EAGAIN;
ret = pc_action_query_task_state(pc);
ret = pc_set_min_freq(pc, freq);
if (ret)
goto out;
return ret;
*freq = pc_get_min_freq(pc);
pc->user_requested_min = freq;
out:
mutex_unlock(&pc->freq_lock);
return ret;
return 0;
}
/**
@@ -595,25 +652,29 @@ out:
* -EINVAL if value out of bounds.
*/
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
guard(mutex)(&pc->freq_lock);
return xe_guc_pc_set_min_freq_locked(pc, freq);
}
static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
{
int ret;
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
ret = -EAGAIN;
goto out;
}
lockdep_assert_held(&pc->freq_lock);
ret = pc_set_min_freq(pc, freq);
/* Might be in the middle of a gt reset */
if (!pc->freq_ready)
return -EAGAIN;
ret = pc_action_query_task_state(pc);
if (ret)
goto out;
return ret;
pc->user_requested_min = freq;
*freq = pc_get_max_freq(pc);
out:
mutex_unlock(&pc->freq_lock);
return ret;
return 0;
}
/**
@@ -625,25 +686,29 @@ out:
* -EAGAIN if GuC PC not ready (likely in middle of a reset).
*/
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
guard(mutex)(&pc->freq_lock);
return xe_guc_pc_get_max_freq_locked(pc, freq);
}
static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
{
int ret;
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
ret = -EAGAIN;
goto out;
}
lockdep_assert_held(&pc->freq_lock);
ret = pc_action_query_task_state(pc);
/* Might be in the middle of a gt reset */
if (!pc->freq_ready)
return -EAGAIN;
ret = pc_set_max_freq(pc, freq);
if (ret)
goto out;
return ret;
*freq = pc_get_max_freq(pc);
pc->user_requested_max = freq;
out:
mutex_unlock(&pc->freq_lock);
return ret;
return 0;
}
/**
@@ -657,24 +722,14 @@ out:
*/
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
ret = -EAGAIN;
goto out;
if (XE_WA(pc_to_gt(pc), 22019338487)) {
if (wait_for_flush_complete(pc) != 0)
return -EAGAIN;
}
ret = pc_set_max_freq(pc, freq);
if (ret)
goto out;
guard(mutex)(&pc->freq_lock);
pc->user_requested_max = freq;
out:
mutex_unlock(&pc->freq_lock);
return ret;
return xe_guc_pc_set_max_freq_locked(pc, freq);
}
/**
@@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
{
struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
int ret;
lockdep_assert_held(&pc->freq_lock);
@@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
if (pc_get_min_freq(pc) > pc->rp0_freq)
ret = pc_set_min_freq(pc, pc->rp0_freq);
if (XE_WA(tile->primary_gt, 14022085890))
ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
out:
return ret;
}
@@ -868,29 +927,116 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
return XE_WA(gt, 22019338487) &&
pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
}
/**
* xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
* @pc: the xe_guc_pc object
*
* As per the WA, reduce max GT frequency during L2 cache flush
*/
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
u32 max_freq;
int ret;
if (!needs_flush_freq_limit(pc))
return;
guard(mutex)(&pc->freq_lock);
ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
if (ret) {
xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
return;
}
atomic_set(&pc->flush_freq_limit, 1);
/*
* If user has previously changed max freq, stash that value to
* restore later, otherwise use the current max. New user
* requests wait on flush.
*/
if (pc->user_requested_max != 0)
pc->stashed_max_freq = pc->user_requested_max;
else
pc->stashed_max_freq = max_freq;
}
/*
* Wait for actual freq to go below the flush cap: even if the previous
* max was below cap, the current one might still be above it
*/
ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
if (ret)
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
}
/**
* xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
* @pc: the xe_guc_pc object
*
* Retrieve the previous GT max frequency value.
*/
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
int ret = 0;
if (XE_WA(pc_to_gt(pc), 22019338487)) {
/*
* Get updated min/max and stash them.
*/
ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
if (!ret)
ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
if (ret)
return ret;
if (!needs_flush_freq_limit(pc))
return;
/*
* Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
*/
mutex_lock(&pc->freq_lock);
ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
if (!ret)
ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
mutex_unlock(&pc->freq_lock);
}
if (!atomic_read(&pc->flush_freq_limit))
return;
mutex_lock(&pc->freq_lock);
ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
if (ret)
xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
pc->stashed_max_freq, ret);
atomic_set(&pc->flush_freq_limit, 0);
mutex_unlock(&pc->freq_lock);
wake_up_var(&pc->flush_freq_limit);
}
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
{
int ret;
if (!XE_WA(pc_to_gt(pc), 22019338487))
return 0;
guard(mutex)(&pc->freq_lock);
/*
* Get updated min/max and stash them.
*/
ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
if (!ret)
ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
if (ret)
return ret;
/*
* Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
*/
ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
if (!ret)
ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
return ret;
}

View File

@@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
#endif /* _XE_GUC_PC_H_ */

View File

@@ -15,6 +15,8 @@
struct xe_guc_pc {
/** @bo: GGTT buffer object that is shared with GuC PC */
struct xe_bo *bo;
/** @flush_freq_limit: 1 when max freq changes are limited by driver */
atomic_t flush_freq_limit;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
/** @rpa_freq: HW RPa frequency - The Achievable one */

View File

@@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_gpu_scheduler *sched = &ge->sched;
bool wedged;
bool wedged = false;
xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
trace_xe_exec_queue_lr_cleanup(q);
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
if (!exec_queue_killed(q))
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
@@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
pid_t pid = -1;
int i = 0;
bool wedged, skip_timeout_check;
bool wedged = false, skip_timeout_check;
/*
* TDR has fired before free job worker. Common if exec queue
@@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
* genuine timeouts.
*/
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
if (!exec_queue_killed(q))
wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Engine state now stable, disable scheduling to check timestamp */
if (!wedged && exec_queue_registered(q)) {

View File

@@ -40,6 +40,7 @@
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
#define LRC_WA_BB_SIZE SZ_4K
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
@@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
{
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
xe_bo_unpin_map_no_vm(lrc->bo);
xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
}
static size_t wa_bb_offset(struct xe_lrc *lrc)
{
return lrc->bo->size - LRC_WA_BB_SIZE;
}
/*
@@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
#define CONTEXT_ACTIVE 1ULL
static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
{
const size_t max_size = LRC_WA_BB_SIZE;
u32 *cmd, *buf = NULL;
if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL);
if (lrc->bo->vmap.is_iomem) {
buf = kmalloc(max_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
cmd = buf;
} else {
cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
}
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
@@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
*cmd++ = MI_BATCH_BUFFER_END;
if (buf) {
xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0,
buf, (cmd - buf) * sizeof(*cmd));
xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
wa_bb_offset(lrc), buf,
(cmd - buf) * sizeof(*cmd));
kfree(buf);
}
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) +
wa_bb_offset(lrc) + 1);
return 0;
}
@@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
lrc_size + LRC_WA_BB_SIZE,
ttm_bo_type_kernel,
bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
ttm_bo_type_kernel,
bo_flags);
if (IS_ERR(lrc->bb_per_ctx_bo)) {
err = PTR_ERR(lrc->bb_per_ctx_bo);
goto err_lrc_finish;
}
lrc->size = lrc_size;
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
@@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
LRC_WA_BB_SIZE;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);

View File

@@ -53,9 +53,6 @@ struct xe_lrc {
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u64 ctx_timestamp;
/** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;

View File

@@ -82,7 +82,7 @@ struct xe_migrate {
* of the instruction. Subtracting the instruction header (1 dword) and
* address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
*/
#define MAX_PTE_PER_SDI 0x1FE
#define MAX_PTE_PER_SDI 0x1FEU
/**
* xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
@@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size)
u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
/*
* MI_STORE_DATA_IMM command is used to update page table. Each
* instruction can update maximumly 0x1ff pte entries. To update
* n (n <= 0x1ff) pte entries, we need:
* 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
* 2 dword for the page table's physical location
* 2*n dword for value of pte to fill (each pte entry is 2 dwords)
* instruction can update maximumly MAX_PTE_PER_SDI pte entries. To
* update n (n <= MAX_PTE_PER_SDI) pte entries, we need:
*
* - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
* - 2 dword for the page table's physical location
* - 2*n dword for value of pte to fill (each pte entry is 2 dwords)
*/
num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI);
num_dword += entries * 2;
return num_dword;
@@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) {
u32 chunk = min(0x1ffU, ptes);
u32 chunk = min(MAX_PTE_PER_SDI, ptes);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = pt_offset;

View File

@@ -21,7 +21,8 @@
GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
14018094691 GRAPHICS_VERSION(2004)
14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
GRAPHICS_VERSION(2004)
14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
18024947630 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
@@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000)
MEDIA_VERSION_RANGE(1301, 3000)
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
MEDIA_VERSION_RANGE(1300, 3000)
# SoC workaround - currently applies to all platforms with the following
# primary GT GMDID
14022085890 GRAPHICS_VERSION(2001)