drm/amdgpu: Intercept ras interrupts to ras module

Intercept ras interrupts to ras module.

V2:
  Change function names in ras module.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
YiPeng Chai
2025-09-28 14:25:27 +08:00
committed by Alex Deucher
parent 4b6ec94fda
commit 3d72d2e5f4
4 changed files with 35 additions and 4 deletions

View File

@@ -36,6 +36,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu_reset.h"
#include "amdgpu_ras_mgr.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -746,6 +747,20 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset)
{
if (amdgpu_uniras_enabled(adev)) {
struct ras_ih_info ih_info;
memset(&ih_info, 0, sizeof(ih_info));
ih_info.block = block;
ih_info.pasid = pasid;
ih_info.reset = reset;
ih_info.pasid_fn = pasid_fn;
ih_info.data = data;
amdgpu_ras_mgr_handle_consumer_interrupt(adev, &ih_info);
return;
}
amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
}

View File

@@ -41,6 +41,7 @@
#include "atom.h"
#include "amdgpu_reset.h"
#include "amdgpu_psp.h"
#include "amdgpu_ras_mgr.h"
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -2241,6 +2242,11 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
return;
if (amdgpu_uniras_enabled(adev)) {
amdgpu_ras_mgr_handle_fatal_interrupt(adev, NULL);
return;
}
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
@@ -2411,6 +2417,16 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_manager *obj;
struct ras_ih_data *data;
if (amdgpu_uniras_enabled(adev)) {
struct ras_ih_info ih_info;
memset(&ih_info, 0, sizeof(ih_info));
ih_info.block = info->head.block;
memcpy(&ih_info.iv_entry, info->entry, sizeof(struct amdgpu_iv_entry));
return amdgpu_ras_mgr_handle_controller_interrupt(adev, &ih_info);
}
obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;

View File

@@ -381,7 +381,7 @@ static const struct amd_ip_funcs __maybe_unused ras_v1_0_ip_funcs = {
.hw_fini = amdgpu_ras_mgr_hw_fini,
};
int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable)
int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable)
{
struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
@@ -395,7 +395,7 @@ int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable)
return ras_core_set_status(ras_mgr->ras_core, enable);
}
bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev)
bool amdgpu_uniras_enabled(struct amdgpu_device *adev)
{
struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);

View File

@@ -56,8 +56,8 @@ struct amdgpu_ras_mgr {
struct amdgpu_ras_mgr *amdgpu_ras_mgr_get_context(
struct amdgpu_device *adev);
int amdgpu_enable_unified_ras(struct amdgpu_device *adev, bool enable);
bool amdgpu_unified_ras_enabled(struct amdgpu_device *adev);
int amdgpu_enable_uniras(struct amdgpu_device *adev, bool enable);
bool amdgpu_uniras_enabled(struct amdgpu_device *adev);
int amdgpu_ras_mgr_handle_fatal_interrupt(struct amdgpu_device *adev, void *data);
int amdgpu_ras_mgr_handle_controller_interrupt(struct amdgpu_device *adev, void *data);
int amdgpu_ras_mgr_handle_consumer_interrupt(struct amdgpu_device *adev, void *data);