summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorAndré Almeida <andrealmeid@igalia.com>2025-02-26 10:11:18 -0300
committerAlex Deucher <alexander.deucher@amd.com>2025-02-27 16:50:04 -0500
commit9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc (patch)
tree2f2f4a9d6005ffd52136a300084e78f36fa41772 /drivers/gpu/drm/amd/amdgpu
parent63e6a77ccf239337baa9b1e7787cde9fa0462092 (diff)
downloadlinux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.tar.gz
linux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.tar.bz2
linux-9c696cc57c1a6dab6da6b51f4b30a7d16e233cbc.zip
drm/amdgpu: Create a debug option to disable ring reset
Prior to the addition of ring reset, the debug option `debug_disable_soft_recovery` could be used to force a full device reset. Now that we have ring reset, create a debug option to disable them in amdgpu, forcing the driver to go with the full device reset path again when both options are combined. This option is useful for testing and debugging purposes when one wants to test the full reset from userspace. Signed-off-by: André Almeida <andrealmeid@igalia.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c5
3 files changed, 10 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2b1990ea9639..2a9a41f4e748 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1192,6 +1192,7 @@ struct amdgpu_device {
bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca;
bool debug_exp_resets;
+ bool debug_disable_gpu_ring_reset;
bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1819166cb4cf..8c1aa9feda53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -137,6 +137,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+ AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2223,6 +2224,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enable experimental reset features\n");
adev->debug_exp_resets = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+ pr_info("debug: ring reset disabled\n");
+ adev->debug_disable_gpu_ring_reset = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index c37bc683253a..5537c8bfd227 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -131,8 +131,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
/* attempt a per ring reset */
- if (amdgpu_gpu_recovery &&
- ring->funcs->reset) {
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "Ring reset disabled by debug mask\n");
+ } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
bool is_guilty;
dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);