drm/xe/pf: Synchronize VF FLR between all GTs

The PF part of the VF FLR processing shall be done after all GuCs
confirm that they finished their part VF FLR processing, otherwise
PF may start clearing VF's GGTT that other GuC may still accessing.

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
Link: https://lore.kernel.org/r/20250930233525.201263-7-michal.wajdeczko@intel.com
This commit is contained in:
Michal Wajdeczko
2025-10-01 01:35:24 +02:00
parent 03dc00c782
commit 2a8fcf7cc9
5 changed files with 88 additions and 1 deletions

View File

@@ -18,6 +18,7 @@
#include "xe_gt_sriov_printk.h"
#include "xe_guc_ct.h"
#include "xe_sriov.h"
#include "xe_sriov_pf_control.h"
#include "xe_sriov_pf_service.h"
#include "xe_tile.h"
@@ -170,6 +171,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(FLR_SEND_START);
CASE2STR(FLR_WAIT_GUC);
CASE2STR(FLR_GUC_DONE);
CASE2STR(FLR_SYNC);
CASE2STR(FLR_RESET_CONFIG);
CASE2STR(FLR_RESET_DATA);
CASE2STR(FLR_RESET_MMIO);
@@ -940,6 +942,10 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
* : v : | |
* : FLR_GUC_DONE : | |
* : | : | |
* : | o--<--sync : | |
* : |/ / : | |
* : FLR_SYNC--o : | |
* : | : | |
* : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
* : | : | |
* : FLR_RESET_DATA : | |
@@ -1147,12 +1153,38 @@ static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
return true;
}
static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
return false;
pf_enter_vf_flr_reset_config(gt, vfid);
return true;
}
static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
{
int ret;
if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
pf_enter_vf_state_machine_bug(gt, vfid);
ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
if (ret < 0) {
xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret));
pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
} else {
xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n");
pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
}
}
static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
return false;
pf_enter_vf_flr_reset_config(gt, vfid);
pf_enter_vf_flr_sync(gt, vfid);
return true;
}
@@ -1178,6 +1210,28 @@ int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
return 0;
}
/**
* xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint.
* @gt: the &xe_gt
* @vfid: the VF identifier
* @sync: if true it will allow to exit the checkpoint
*
* Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR
* in progress, or a negative error code on the FLR busy or failed.
*/
int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync)
{
if (sync && pf_exit_vf_flr_sync(gt, vfid))
return 1;
if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
return 1;
if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
return -EBUSY;
if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
return -EIO;
return 0;
}
/**
* xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete.
* @gt: the &xe_gt

View File

@@ -18,6 +18,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid);
#ifdef CONFIG_PCI_IOV

View File

@@ -18,6 +18,7 @@
* @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command.
* @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC.
* @XE_GT_SRIOV_STATE_FLR_SYNC: indicates that the PF awaits to synchronize with other GuCs.
* @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources.
* @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data.
* @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers.
@@ -47,6 +48,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_FLR_SEND_START,
XE_GT_SRIOV_STATE_FLR_WAIT_GUC,
XE_GT_SRIOV_STATE_FLR_GUC_DONE,
XE_GT_SRIOV_STATE_FLR_SYNC,
XE_GT_SRIOV_STATE_FLR_RESET_CONFIG,
XE_GT_SRIOV_STATE_FLR_RESET_DATA,
XE_GT_SRIOV_STATE_FLR_RESET_MMIO,

View File

@@ -120,3 +120,32 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid)
return result;
}
/**
* xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs.
* @xe: the &xe_device
* @vfid: the VF identifier
*
* This function is for PF only.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid)
{
struct xe_gt *gt;
unsigned int id;
int ret;
for_each_gt(gt, xe, id) {
ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, false);
if (ret < 0)
return ret;
}
for_each_gt(gt, xe, id) {
ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, true);
if (ret < 0)
return ret;
}
return 0;
}

View File

@@ -12,5 +12,6 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid);
#endif