drm/amd/ras: Add CPER ring read for uniras

Read CPER raw data from debugfs node "/sys/kernel/debug/dri/*/
amdgpu_ring_cper".

Signed-off-by: Xiang Liu <xiang.liu@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Yang Wang <kevinyang.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Xiang Liu
2025-10-22 15:11:42 +08:00
committed by Alex Deucher
parent ad0a48e531
commit 527e3d4033
2 changed files with 61 additions and 1 deletions

View File

@@ -770,7 +770,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
if (adev->cper.enabled && !amdgpu_uniras_enabled(adev) &&
amdgpu_cper_generate_bp_threshold_record(adev))
dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
if ((amdgpu_bad_page_threshold != -1) &&

View File

@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_ras_mgr.h"
#include "atom.h"
/*
@@ -495,6 +496,61 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
*/
#if defined(CONFIG_DEBUG_FS)
static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *offset)
{
const uint8_t ring_header_size = 12;
struct amdgpu_ring *ring = file_inode(f)->i_private;
struct ras_cmd_cper_snapshot_req *snapshot_req __free(kfree) =
kzalloc(sizeof(struct ras_cmd_cper_snapshot_req), GFP_KERNEL);
struct ras_cmd_cper_snapshot_rsp *snapshot_rsp __free(kfree) =
kzalloc(sizeof(struct ras_cmd_cper_snapshot_rsp), GFP_KERNEL);
struct ras_cmd_cper_record_req *record_req __free(kfree) =
kzalloc(sizeof(struct ras_cmd_cper_record_req), GFP_KERNEL);
struct ras_cmd_cper_record_rsp *record_rsp __free(kfree) =
kzalloc(sizeof(struct ras_cmd_cper_record_rsp), GFP_KERNEL);
uint8_t *ring_header __free(kfree) =
kzalloc(ring_header_size, GFP_KERNEL);
uint32_t total_cper_num;
uint64_t start_cper_id;
int r;
if (!snapshot_req || !snapshot_rsp || !record_req || !record_rsp ||
!ring_header)
return -ENOMEM;
if (!(*offset)) {
if (copy_to_user(buf, ring_header, ring_header_size))
return -EFAULT;
buf += ring_header_size;
}
r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,
RAS_CMD__GET_CPER_SNAPSHOT,
snapshot_req, sizeof(struct ras_cmd_cper_snapshot_req),
snapshot_rsp, sizeof(struct ras_cmd_cper_snapshot_rsp));
if (r || !snapshot_rsp->total_cper_num)
return r;
start_cper_id = snapshot_rsp->start_cper_id;
total_cper_num = snapshot_rsp->total_cper_num;
record_req->buf_ptr = (uint64_t)(uintptr_t)buf;
record_req->buf_size = size;
record_req->cper_start_id = start_cper_id + *offset;
record_req->cper_num = total_cper_num;
r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev, RAS_CMD__GET_CPER_RECORD,
record_req, sizeof(struct ras_cmd_cper_record_req),
record_rsp, sizeof(struct ras_cmd_cper_record_rsp));
if (r)
return r;
r = *offset ? record_rsp->real_data_size : record_rsp->real_data_size + ring_header_size;
(*offset) += record_rsp->real_cper_num;
return r;
}
/* Layout of file is 12 bytes consisting of
* - rptr
* - wptr
@@ -511,6 +567,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
loff_t i;
int r;
if (ring->funcs->type == AMDGPU_RING_TYPE_CPER && amdgpu_uniras_enabled(ring->adev))
return amdgpu_ras_cper_debugfs_read(f, buf, size, pos);
if (*pos & 3 || size & 3)
return -EINVAL;