summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorHawking Zhang <Hawking.Zhang@amd.com>2021-04-02 14:39:36 +0800
committerAlex Deucher <alexander.deucher@amd.com>2021-04-09 16:51:04 -0400
commit6e36f23193cc870856a41e87281f62fb2b04bd1f (patch)
treecf35baff233629f14cbce1e3b48f9fe316ffdfbd /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parent87da0cc101e723833446e52971fac5fa7358dec5 (diff)
downloadlinux-stable-6e36f23193cc870856a41e87281f62fb2b04bd1f.tar.gz
linux-stable-6e36f23193cc870856a41e87281f62fb2b04bd1f.tar.bz2
linux-stable-6e36f23193cc870856a41e87281f62fb2b04bd1f.zip
drm/amdgpu: split nbio callbacks into ras and non-ras ones
nbio ras is not managed by gpu driver when gpu is connected to cpu through xgmi. split nbio callbacks into ras and non-ras ones so gpu driver only initializes nbio ras callbacks when it manages nbio ras. Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Dennis Li <Dennis.Li@amd.com> Reviewed-by: John Clements <John.Clements@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1708045e2a0d..ac3f4c3266bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -804,8 +804,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->mmhub.funcs->query_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__PCIE_BIF:
- if (adev->nbio.funcs->query_ras_error_count)
- adev->nbio.funcs->query_ras_error_count(adev, &err_data);
+ if (adev->nbio.ras_funcs &&
+ adev->nbio.ras_funcs->query_ras_error_count)
+ adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
amdgpu_xgmi_query_ras_error_count(adev, &err_data);
@@ -2030,14 +2031,31 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
- if (adev->nbio.funcs->init_ras_controller_interrupt) {
- r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
+ /* initialize nbio ras function ahead of any other
+ * ras functions so hardware fatal error interrupt
+ * can be enabled as early as possible */
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ adev->nbio.ras_funcs = &nbio_v7_4_ras_funcs;
+ break;
+ default:
+ /* nbio ras is not available */
+ break;
+ }
+
+ if (adev->nbio.ras_funcs &&
+ adev->nbio.ras_funcs->init_ras_controller_interrupt) {
+ r = adev->nbio.ras_funcs->init_ras_controller_interrupt(adev);
if (r)
goto release_con;
}
- if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
- r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
+ if (adev->nbio.ras_funcs &&
+ adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt) {
+ r = adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt(adev);
if (r)
goto release_con;
}