drm/amdgpu: reduce queue timeout to 2 seconds v2

There has been multiple complains that 10 seconds are usually to long.

The original requirement for longer timeout came from compute tests on
AMDVLK, since that is no longer a topic reduce the timeout back to 2
seconds for all queues.

While at it also remove any special handling for compute queues under
SRIOV or pass through.

v2: fix checkpatch warning.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König
2025-09-25 12:09:56 +02:00
committed by Alex Deucher
parent 861fc60b17
commit 1bea57ea75
2 changed files with 47 additions and 58 deletions

View File

@@ -4285,58 +4285,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout;
int ret = 0;
/*
* By default timeout for jobs is 10 sec
*/
adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
/* By default timeout for all queues is 2 sec */
adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
adev->video_timeout = msecs_to_jiffies(2000);
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
if (ret)
return ret;
if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
return 0;
if (timeout == 0) {
index++;
continue;
} else if (timeout < 0) {
timeout = MAX_SCHEDULE_TIMEOUT;
dev_warn(adev->dev, "lockup timeout disabled");
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
} else {
timeout = msecs_to_jiffies(timeout);
}
while ((timeout_setting = strsep(&input, ",")) &&
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
if (ret)
return ret;
switch (index++) {
case 0:
adev->gfx_timeout = timeout;
break;
case 1:
adev->compute_timeout = timeout;
break;
case 2:
adev->sdma_timeout = timeout;
break;
case 3:
adev->video_timeout = timeout;
break;
default:
break;
}
if (timeout == 0) {
index++;
continue;
} else if (timeout < 0) {
timeout = MAX_SCHEDULE_TIMEOUT;
dev_warn(adev->dev, "lockup timeout disabled");
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
} else {
timeout = msecs_to_jiffies(timeout);
}
/*
* There is only one value specified and
* it should apply to all non-compute jobs.
*/
if (index == 1) {
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
adev->compute_timeout = adev->gfx_timeout;
switch (index++) {
case 0:
adev->gfx_timeout = timeout;
break;
case 1:
adev->compute_timeout = timeout;
break;
case 2:
adev->sdma_timeout = timeout;
break;
case 3:
adev->video_timeout = timeout;
break;
default:
break;
}
}
/* When only one value specified apply it to all queues. */
if (index == 1)
adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
adev->video_timeout = timeout;
return ret;
}

View File

@@ -354,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
* multiple values specified. 0 and negative values are invalidated. They will be adjusted
* to the default timeout.
* The format can be [single value] for setting all timeouts at once or
* [GFX,Compute,SDMA,Video] to set individual timeouts.
* Negative values mean infinity.
*
* - With one value specified, the setting will apply to all non-compute jobs.
* - With multiple values specified, the first one will be for GFX.
* The second one is for Compute. The third and fourth ones are
* for SDMA and Video.
*
* By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
* By default(with no lockup_timeout settings), the timeout for all queues is 2000.
*/
MODULE_PARM_DESC(lockup_timeout,
"GPU lockup timeout in ms (default: 10000 for all jobs. "
"0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
"for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
"GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
module_param_string(lockup_timeout, amdgpu_lockup_timeout,
sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)