diff options
author | André Almeida <andrealmeid@igalia.com> | 2025-02-26 10:11:18 -0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2025-02-27 16:50:04 -0500 |
commit | 9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc (patch) | |
tree | 2f2f4a9d6005ffd52136a300084e78f36fa41772 /drivers/gpu/drm/amd/amdgpu | |
parent | 63e6a77ccf239337baa9b1e7787cde9fa0462092 (diff) | |
download | linux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.tar.gz linux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.tar.bz2 linux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.zip |
drm/amdgpu: Create a debug option to disable ring reset
Prior to the addition of ring reset, the debug option
`debug_disable_soft_recovery` could be used to force a full device
reset. Now that we have ring reset, create a debug option to disable
them in amdgpu, forcing the driver to go with the full device
reset path again when both options are combined.
This option is useful for testing and debugging purposes when one wants
to test the full reset from userspace.
Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 |
3 files changed, 10 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2b1990ea9639..2a9a41f4e748 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1192,6 +1192,7 @@ struct amdgpu_device { bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; bool debug_exp_resets; + bool debug_disable_gpu_ring_reset; bool enforce_isolation[MAX_XCP]; /* Added this mutex for cleaner shader isolation between GFX and compute processes */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1819166cb4cf..8c1aa9feda53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -137,6 +137,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), + AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -2223,6 +2224,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: enable experimental reset features\n"); adev->debug_exp_resets = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) { + pr_info("debug: ring reset disabled\n"); + adev->debug_disable_gpu_ring_reset = true; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c37bc683253a..5537c8bfd227 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -131,8 +131,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } /* attempt a per ring reset */ - if (amdgpu_gpu_recovery && - ring->funcs->reset) { + if (unlikely(adev->debug_disable_gpu_ring_reset)) { + dev_err(adev->dev, "Ring reset disabled by debug mask\n"); + } else if (amdgpu_gpu_recovery && ring->funcs->reset) { bool is_guilty; dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); |