mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
drm/amdgpu: add RAS bad page threshold handling for PMFW manages eeprom
Check if bad page threshold is reached and take actions accordingly. v2: remove rma message sent to smu when pmfw manages eeprom. v3: add null pointer check for con. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -903,6 +903,33 @@ int amdgpu_ras_eeprom_update_record_num(struct amdgpu_ras_eeprom_control *contro
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int amdgpu_ras_smu_eeprom_append(struct amdgpu_ras_eeprom_control *control)
|
||||
{
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!amdgpu_ras_smu_eeprom_supported(adev) || !con)
|
||||
return 0;
|
||||
|
||||
control->ras_num_bad_pages = con->bad_page_num;
|
||||
|
||||
if (amdgpu_bad_page_threshold != 0 &&
|
||||
control->ras_num_bad_pages > con->bad_page_cnt_threshold) {
|
||||
dev_warn(adev->dev,
|
||||
"Saved bad pages %d reaches threshold value %d\n",
|
||||
control->ras_num_bad_pages, con->bad_page_cnt_threshold);
|
||||
|
||||
if (adev->cper.enabled && amdgpu_cper_generate_bp_threshold_record(adev))
|
||||
dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
|
||||
|
||||
if ((amdgpu_bad_page_threshold != -1) &&
|
||||
(amdgpu_bad_page_threshold != -2))
|
||||
con->is_rma = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
|
||||
* @control: pointer to control structure
|
||||
@@ -921,17 +948,14 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
|
||||
const u32 num)
|
||||
{
|
||||
struct amdgpu_device *adev = to_amdgpu_device(control);
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
int res, i;
|
||||
uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
|
||||
|
||||
if (!__is_ras_eeprom_supported(adev) || !con)
|
||||
if (!__is_ras_eeprom_supported(adev))
|
||||
return 0;
|
||||
|
||||
if (amdgpu_ras_smu_eeprom_supported(adev)) {
|
||||
control->ras_num_bad_pages = con->bad_page_num;
|
||||
return 0;
|
||||
}
|
||||
if (amdgpu_ras_smu_eeprom_supported(adev))
|
||||
return amdgpu_ras_smu_eeprom_append(control);
|
||||
|
||||
if (num == 0) {
|
||||
dev_err(adev->dev, "will not append 0 records\n");
|
||||
|
||||
Reference in New Issue
Block a user