mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
drm/amd: Unwind for failed device suspend
If device suspend has failed, add a recovery flow that will attempt to unwind the suspend and get things back up and running. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4627 Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
committed by
Alex Deucher
parent
1d61121872
commit
72b0b75d60
@@ -5273,7 +5273,7 @@ void amdgpu_device_complete(struct drm_device *dev)
|
||||
int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
|
||||
{
|
||||
struct amdgpu_device *adev = drm_to_adev(dev);
|
||||
int r = 0;
|
||||
int r, rec;
|
||||
|
||||
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
|
||||
return 0;
|
||||
@@ -5289,8 +5289,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
|
||||
dev_warn(adev->dev, "smart shift update failed\n");
|
||||
r = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3);
|
||||
if (r)
|
||||
goto unwind_sriov;
|
||||
|
||||
if (notify_clients)
|
||||
drm_client_dev_suspend(adev_to_drm(adev));
|
||||
@@ -5301,16 +5302,16 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
|
||||
|
||||
r = amdgpu_device_ip_suspend_phase1(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto unwind_smartshift;
|
||||
|
||||
amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
|
||||
r = amdgpu_userq_suspend(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto unwind_ip_phase1;
|
||||
|
||||
r = amdgpu_device_evict_resources(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto unwind_userq;
|
||||
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, false);
|
||||
|
||||
@@ -5318,12 +5319,62 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
|
||||
|
||||
r = amdgpu_device_ip_suspend_phase2(adev);
|
||||
if (r)
|
||||
return r;
|
||||
goto unwind_evict;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
amdgpu_virt_release_full_gpu(adev, false);
|
||||
|
||||
return 0;
|
||||
|
||||
unwind_evict:
|
||||
if (adev->mman.buffer_funcs_ring->sched.ready)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
amdgpu_fence_driver_hw_init(adev);
|
||||
|
||||
unwind_userq:
|
||||
rec = amdgpu_userq_resume(adev);
|
||||
if (rec) {
|
||||
dev_warn(adev->dev, "failed to re-initialize user queues: %d\n", rec);
|
||||
return r;
|
||||
}
|
||||
rec = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm);
|
||||
if (rec) {
|
||||
dev_warn(adev->dev, "failed to re-initialize kfd: %d\n", rec);
|
||||
return r;
|
||||
}
|
||||
|
||||
unwind_ip_phase1:
|
||||
/* suspend phase 1 = resume phase 3 */
|
||||
rec = amdgpu_device_ip_resume_phase3(adev);
|
||||
if (rec) {
|
||||
dev_warn(adev->dev, "failed to re-initialize IPs phase1: %d\n", rec);
|
||||
return r;
|
||||
}
|
||||
|
||||
unwind_smartshift:
|
||||
rec = amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0);
|
||||
if (rec) {
|
||||
dev_warn(adev->dev, "failed to re-update smart shift: %d\n", rec);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (notify_clients)
|
||||
drm_client_dev_resume(adev_to_drm(adev));
|
||||
|
||||
amdgpu_ras_resume(adev);
|
||||
|
||||
unwind_sriov:
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
rec = amdgpu_virt_request_full_gpu(adev, true);
|
||||
if (rec) {
|
||||
dev_warn(adev->dev, "failed to reinitialize sriov: %d\n", rec);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
adev->in_suspend = adev->in_s0ix = adev->in_s3 = false;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline int amdgpu_virt_resume(struct amdgpu_device *adev)
|
||||
|
||||
Reference in New Issue
Block a user