From 9af357bc3e05400eb632f3975986e1eac196f159 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 23 Mar 2023 10:21:49 +0800 Subject: drm/amdgpu: Add fatal error handling in nbio v4_3 GPU will stop working once fatal error is detected. it will inform driver to do reset to recover from the fatal error. v2: squash in logic fix (Srinivasan) v3: squash in logic fix (Dan) Signed-off-by: Hawking Zhang Reviewed-by: Candice Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c6dc3cd2a9de..4069bce9479f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -34,6 +34,7 @@ #include "amdgpu_atomfirmware.h" #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" +#include "nbio_v4_3.h" #include "atom.h" #include "amdgpu_reset.h" @@ -2562,6 +2563,16 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.connected_to_cpu) adev->nbio.ras = &nbio_v7_4_ras; break; + case IP_VERSION(4, 3, 0): + if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF)) + /* unlike other generation of nbio ras, + * nbio v4_3 only support fatal error interrupt + * to inform software that DF is freezed due to + * system fatal error event. driver should not + * enable nbio ras in such case. Instead, + * check DF RAS */ + adev->nbio.ras = &nbio_v4_3_ras; + break; default: /* nbio ras is not available */ break; -- cgit v1.2.3