summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
diff options
context:
space:
mode:
authorJack Zhang <Jack.Zhang1@amd.com>2021-03-08 12:41:27 +0800
committerAlex Deucher <alexander.deucher@amd.com>2021-04-09 16:45:45 -0400
commite6c6338f393b74ac0b303d567bb918b44ae7ad75 (patch)
tree589404ebe414ab0b537e4bbfe5388dce0e444557 /drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
parent030bb4addb36ee94e286eb51486f990cac433825 (diff)
downloadlinux-e6c6338f393b74ac0b303d567bb918b44ae7ad75.tar.gz
linux-e6c6338f393b74ac0b303d567bb918b44ae7ad75.tar.bz2
linux-e6c6338f393b74ac0b303d567bb918b44ae7ad75.zip
drm/amd/amdgpu implement tdr advanced mode
[Why] Previous tdr design treats the first job in job_timeout as the bad job. But sometimes a later bad compute job can block a good gfx job and cause an unexpected gfx job timeout because gfx and compute ring share internal GC HW mutually. [How] This patch implements an advanced tdr mode.It involves an additinal synchronous pre-resubmit step(Step0 Resubmit) before normal resubmit step in order to find the real bad job. 1. At Step0 Resubmit stage, it synchronously submits and pends for the first job being signaled. If it gets timeout, we identify it as guilty and do hw reset. After that, we would do the normal resubmit step to resubmit left jobs. 2. For whole gpu reset(vram lost), do resubmit as the old way. v2: squash in build fix (Alex) Signed-off-by: Jack Zhang <Jack.Zhang1@amd.com> Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 02b75fa64d7d..0e10c3958f94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -516,7 +516,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* DOC: gpu_recovery (int)
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**