summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2021-12-10 13:52:51 +1000
committerDave Airlie <airlied@redhat.com>2021-12-10 13:52:51 +1000
commitf8eb96b4dfbbbadfb73ee9a1cd0294f9e2762a14 (patch)
tree962db18feaa6ff95fb080062f7bcd9add4c34d0b /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentc8a04cbeedbc9f71c475141baa656f14f4879792 (diff)
parent70897848730470cc477d5d89e6222c0f6a9ac173 (diff)
downloadlinux-f8eb96b4dfbbbadfb73ee9a1cd0294f9e2762a14.tar.gz
linux-f8eb96b4dfbbbadfb73ee9a1cd0294f9e2762a14.tar.bz2
linux-f8eb96b4dfbbbadfb73ee9a1cd0294f9e2762a14.zip
Merge tag 'amd-drm-next-5.17-2021-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.17-2021-12-02: amdgpu: - Use generic drm fb helpers - PSR fixes - Rework DCN3.1 clkmgr - DPCD 1.3 fixes - Misc display fixes can cleanups - Clock query fixes for APUs - LTTPR fixes - DSC fixes - Misc PM fixes - RAS fixes - OLED backlight fix - SRIOV fixes - Add STB (Smart Trace Buffer) for supported dGPUs - IH rework - Enable seamless boot for DCN3.01 amdkfd: - Rework more stuff around IP discovery enumeration - Further clean up of interfaces with amdgpu - SVM fixes radeon: - Indentation fixes UAPI: - Add a new KFD header that defines some of the sysfs bitfields and enums that userspace has been using for a while The corresponding bit-fields and enums in user mode are defined in https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/master/include/hsakmttypes.h Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20211202191643.5970-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c44
1 files changed, 35 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 08133de21fdd..46910e7b2927 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -892,6 +892,38 @@ void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev,
}
}
+static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int ret = 0;
+
+ /*
+ * choosing right query method according to
+ * whether smu support query error information
+ */
+ ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc));
+ if (ret == -EOPNOTSUPP) {
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->query_ras_error_count)
+ adev->umc.ras_funcs->query_ras_error_count(adev, err_data);
+
+ /* umc query_ras_error_address is also responsible for clearing
+ * error status
+ */
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->query_ras_error_address)
+ adev->umc.ras_funcs->query_ras_error_address(adev, err_data);
+ } else if (!ret) {
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->ecc_info_query_ras_error_count)
+ adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data);
+
+ if (adev->umc.ras_funcs &&
+ adev->umc.ras_funcs->ecc_info_query_ras_error_address)
+ adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data);
+ }
+}
+
/* query/inject/cure begin */
int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct ras_query_if *info)
@@ -905,15 +937,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__UMC:
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, &err_data);
- /* umc query_ras_error_address is also responsible for clearing
- * error status
- */
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address)
- adev->umc.ras_funcs->query_ras_error_address(adev, &err_data);
+ amdgpu_ras_get_ecc_info(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {
@@ -1935,9 +1959,11 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
if (!con || !con->eh_data)
return 0;
+ mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
save_count = data->count - control->ras_num_recs;
+ mutex_unlock(&con->recovery_lock);
/* only new entries are saved */
if (save_count > 0) {
if (amdgpu_ras_eeprom_append(control,