summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
diff options
context:
space:
mode:
authorXiang Liu <xiang.liu@amd.com>2025-03-19 17:02:49 +0800
committerAlex Deucher <alexander.deucher@amd.com>2025-03-21 12:16:35 -0400
commit338f7412c7ea2ce007e83c5ad7c5e01d8cfce1e1 (patch)
tree2b6d3472e6f6e606f9fc3492e8fa79a23702e5f3 /drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
parent2ec0a7c337fd1087abd5adda638c028f8ae9a989 (diff)
downloadlinux-338f7412c7ea2ce007e83c5ad7c5e01d8cfce1e1.tar.gz
linux-338f7412c7ea2ce007e83c5ad7c5e01d8cfce1e1.tar.bz2
linux-338f7412c7ea2ce007e83c5ad7c5e01d8cfce1e1.zip
drm/amdgpu: Decode deferred error type in gfx aca bank parser
In the case of injecting uncorrected error with background workload, the deferred error among uncorrected errors need to be specified by checking the deferred and poison bits of status register. v2: refine checking for deferred error v2: log possiable DEs among CEs v2: generate CPER records for DEs among UEs Signed-off-by: Xiang Liu <xiang.liu@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c25
1 files changed, 23 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index ffd4c64e123c..dc47f5fd4ea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
{
struct aca_bank_node *node;
struct aca_bank *bank;
+ int r;
if (!adev->cper.enabled)
return;
@@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
/* UEs must be encoded into separate CPER entries */
if (type == ACA_SMU_TYPE_UE) {
+ struct aca_banks de_banks;
+
+ aca_banks_init(&de_banks);
list_for_each_entry(node, &banks->list, node) {
bank = &node->bank;
- if (amdgpu_cper_generate_ue_record(adev, bank))
- dev_warn(adev->dev, "fail to generate ue cper records\n");
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ r = aca_banks_add_bank(&de_banks, bank);
+ if (r)
+ dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+ } else {
+ if (amdgpu_cper_generate_ue_record(adev, bank))
+ dev_warn(adev->dev, "fail to generate ue cper records\n");
+ }
+ }
+
+ if (!list_empty(&de_banks.list)) {
+ if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+ dev_warn(adev->dev, "fail to generate de cper records\n");
}
+
+ aca_banks_release(&de_banks);
} else {
/*
* SMU_TYPE_CE banks are combined into 1 CPER entries,
@@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
if (ret)
return ret;
+ /* DEs may contain in CEs or UEs */
+ if (type != ACA_ERROR_TYPE_DEFERRED)
+ aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
return aca_log_aca_error(handle, type, err_data);
}