drm/amdgpu: reduce queue timeout to 2 seconds v2

There has been multiple complains that 10 seconds are usually to long.

The original requirement for longer timeout came from compute tests on
AMDVLK, since that is no longer a topic reduce the timeout back to 2
seconds for all queues.

While at it also remove any special handling for compute queues under
SRIOV or pass through.

v2: fix checkpatch warning.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König
2025-09-25 12:09:56 +02:00
committed by Alex Deucher
parent 861fc60b17
commit 1bea57ea75
2 changed files with 47 additions and 58 deletions

View File

@@ -4285,58 +4285,53 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
long timeout; long timeout;
int ret = 0; int ret = 0;
/* /* By default timeout for all queues is 2 sec */
* By default timeout for jobs is 10 sec adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
*/ adev->video_timeout = msecs_to_jiffies(2000);
adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000);
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH))
while ((timeout_setting = strsep(&input, ",")) && return 0;
strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
ret = kstrtol(timeout_setting, 0, &timeout);
if (ret)
return ret;
if (timeout == 0) { while ((timeout_setting = strsep(&input, ",")) &&
index++; strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
continue; ret = kstrtol(timeout_setting, 0, &timeout);
} else if (timeout < 0) { if (ret)
timeout = MAX_SCHEDULE_TIMEOUT; return ret;
dev_warn(adev->dev, "lockup timeout disabled");
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
} else {
timeout = msecs_to_jiffies(timeout);
}
switch (index++) { if (timeout == 0) {
case 0: index++;
adev->gfx_timeout = timeout; continue;
break; } else if (timeout < 0) {
case 1: timeout = MAX_SCHEDULE_TIMEOUT;
adev->compute_timeout = timeout; dev_warn(adev->dev, "lockup timeout disabled");
break; add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
case 2: } else {
adev->sdma_timeout = timeout; timeout = msecs_to_jiffies(timeout);
break;
case 3:
adev->video_timeout = timeout;
break;
default:
break;
}
} }
/*
* There is only one value specified and switch (index++) {
* it should apply to all non-compute jobs. case 0:
*/ adev->gfx_timeout = timeout;
if (index == 1) { break;
adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; case 1:
if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) adev->compute_timeout = timeout;
adev->compute_timeout = adev->gfx_timeout; break;
case 2:
adev->sdma_timeout = timeout;
break;
case 3:
adev->video_timeout = timeout;
break;
default:
break;
} }
} }
/* When only one value specified apply it to all queues. */
if (index == 1)
adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout =
adev->video_timeout = timeout;
return ret; return ret;
} }

View File

@@ -354,22 +354,16 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint
* DOC: lockup_timeout (string) * DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms. * Set GPU scheduler timeout value in ms.
* *
* The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or * The format can be [single value] for setting all timeouts at once or
* multiple values specified. 0 and negative values are invalidated. They will be adjusted * [GFX,Compute,SDMA,Video] to set individual timeouts.
* to the default timeout. * Negative values mean infinity.
* *
* - With one value specified, the setting will apply to all non-compute jobs. * By default(with no lockup_timeout settings), the timeout for all queues is 2000.
* - With multiple values specified, the first one will be for GFX.
* The second one is for Compute. The third and fourth ones are
* for SDMA and Video.
*
* By default(with no lockup_timeout settings), the timeout for all jobs is 10000.
*/ */
MODULE_PARM_DESC(lockup_timeout, MODULE_PARM_DESC(lockup_timeout,
"GPU lockup timeout in ms (default: 10000 for all jobs. " "GPU lockup timeout in ms (default: 2000. 0: keep default value. negative: infinity timeout), format: [single value for all] or [GFX,Compute,SDMA,Video].");
"0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " module_param_string(lockup_timeout, amdgpu_lockup_timeout,
"for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); sizeof(amdgpu_lockup_timeout), 0444);
module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/** /**
* DOC: dpm (int) * DOC: dpm (int)