From 1fa8d710573f02ae9118bc5f53e7ede09d6920da Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 14 Apr 2023 18:39:52 +0800 Subject: drm/amdgpu: Fix desktop freezed after gpu-reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] After gpu-reset, sometimes the driver fails to enable vblank irq, causing flip_done timed out and the desktop freezed. During gpu-reset, we disable and enable vblank irq in dm_suspend() and dm_resume(). Later on in amdgpu_irq_gpu_reset_resume_helper(), we check irqs' refcount and decide to enable or disable the irqs again. However, we have 2 sets of API for controling vblank irq, one is dm_vblank_get/put() and another is amdgpu_irq_get/put(). Each API has its own refcount and flag to store the state of vblank irq, and they are not synchronized. In drm we use the first API to control vblank irq but in amdgpu_irq_gpu_reset_resume_helper() we use the second set of API. The failure happens when vblank irq was enabled by dm_vblank_get() before gpu-reset, we have vblank->enabled true. However, during gpu-reset, in amdgpu_irq_gpu_reset_resume_helper() vblank irq's state checked from amdgpu_irq_update() is DISABLED. So finally it disables vblank irq again. After gpu-reset, if there is a cursor plane commit, the driver will try to enable vblank irq by calling drm_vblank_enable(), but the vblank->enabled is still true, so it fails to turn on vblank irq and causes flip_done can't be completed in vblank irq handler and desktop become freezed. [How] Combining the 2 vblank control APIs by letting drm's API finally calls amdgpu_irq's API, so the irq's refcount and state of both APIs can be synchronized. Also add a check to prevent refcount from being less then 0 in amdgpu_irq_put(). v2: - Add warning in amdgpu_irq_enable() if the irq is already disabled. - Call dc_interrupt_set() in dm_set_vblank() to avoid refcount change if it is in gpu-reset. v3: - Improve commit message and code comments. Signed-off-by: Alan Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1d924dc51a3e..e3762e806617 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -169,10 +169,21 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + if (amdgpu_in_reset(adev)) { + irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + /* During gpu-reset we disable and then enable vblank irq, so + * don't use amdgpu_irq_get/put() to avoid refcount change. + */ + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + rc = -EBUSY; + } else { + rc = (enable) + ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) + : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); + } - if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - return -EBUSY; + if (rc) + return rc; skip: if (amdgpu_in_reset(adev)) -- cgit v1.2.3 From cd465a670087f94e62100622f9cbb894f524268a Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Tue, 2 May 2023 17:54:50 +0800 Subject: drm/amd/display: Fix warning in disabling vblank irq [Why] During gpu-reset, we toggle vblank irq by calling dc_interrupt_set() instead of amdgpu_irq_get/put() because we don't want to change the irq source's refcount. However, we see the warning when vblank irq is enabled by dc_interrupt_set() during gpu-reset but disabled by amdgpu_irq_put() after gpu-reset. [How] Only in dm_gpureset_toggle_interrupts() we toggle vblank interrupts by calling dc_interrupt_set(). Apart from this we call dm_set_vblank() which uses amdgpu_irq_get/put() to operate vblank irq. Reviewed-by: Bhawanpreet Lakha Acked-by: Tom Chung Signed-off-by: Alan Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 21 +++++++++++++-------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 16 +++------------- 2 files changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4edbcc9e535b..f59cb4d659dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2475,20 +2475,25 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, if (acrtc && state->stream_status[i].plane_count != 0) { irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", - acrtc->crtc_id, enable ? "en" : "dis", rc); if (rc) DRM_WARN("Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); if (enable) { - rc = amdgpu_dm_crtc_enable_vblank(&acrtc->base); - if (rc) - DRM_WARN("Failed to enable vblank interrupts\n"); - } else { - amdgpu_dm_crtc_disable_vblank(&acrtc->base); - } + if (amdgpu_dm_crtc_vrr_active(to_dm_crtc_state(acrtc->base.state))) + rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, true); + } else + rc = amdgpu_dm_crtc_set_vupdate_irq(&acrtc->base, false); + if (rc) + DRM_WARN("Failed to %sable vupdate interrupt\n", enable ? "en" : "dis"); + + irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + /* During gpu-reset we disable and then enable vblank irq, so + * don't use amdgpu_irq_get/put() to avoid refcount change. + */ + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + DRM_WARN("Failed to %sable vblank interrupt\n", enable ? "en" : "dis"); } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index e3762e806617..440fc0869a34 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -146,7 +146,6 @@ static void vblank_control_worker(struct work_struct *work) static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) { - enum dc_irq_source irq_source; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct amdgpu_device *adev = drm_to_adev(crtc->dev); struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); @@ -169,18 +168,9 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) if (rc) return rc; - if (amdgpu_in_reset(adev)) { - irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; - /* During gpu-reset we disable and then enable vblank irq, so - * don't use amdgpu_irq_get/put() to avoid refcount change. - */ - if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - rc = -EBUSY; - } else { - rc = (enable) - ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) - : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); - } + rc = (enable) + ? amdgpu_irq_get(adev, &adev->crtc_irq, acrtc->crtc_id) + : amdgpu_irq_put(adev, &adev->crtc_irq, acrtc->crtc_id); if (rc) return rc; -- cgit v1.2.3