drm/amdgpu: get RAS bad page address from MCA address

Instead of from physical address.

v2: add comment to make the code more readable

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Tao Zhou
2025-08-27 15:48:06 +08:00
committed by Alex Deucher
parent 50d9ebd66e
commit e84835940e
2 changed files with 14 additions and 5 deletions

View File

@@ -3014,8 +3014,13 @@ static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
addr_in.ma.err_addr = bps->address;
addr_in.ma.socket_id = socket;
addr_in.ma.ch_inst = bps->mem_channel;
/* tell RAS TA the node instance is not used */
addr_in.ma.node_inst = TA_RAS_INV_NODE;
if (!amdgpu_ras_smu_eeprom_supported(adev)) {
/* tell RAS TA the node instance is not used */
addr_in.ma.node_inst = TA_RAS_INV_NODE;
} else {
addr_in.ma.umc_inst = bps->mcumc_id;
addr_in.ma.node_inst = bps->cu;
}
if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
@@ -3162,7 +3167,11 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
} else {
save_nps = nps;
/* if pmfw manages eeprom, save_nps is not stored on eeprom,
* we should always convert mca address into physical address,
* make save_nps different from nps
*/
save_nps = nps + 1;
}
if (save_nps == nps) {

View File

@@ -1022,9 +1022,9 @@ int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
record[i - rec_idx].retired_page = 0x1ULL;
record[i - rec_idx].ts = ts;
record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
record[i - rec_idx].cu = 0;
adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
adev->umc.ras->mca_ipid_parse(adev, ipid,
(uint32_t *)&(record[i - rec_idx].cu),
(uint32_t *)&(record[i - rec_idx].mem_channel),
(uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
}