diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 8ce5dc6efcf9..e66016e02c1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -21,6 +21,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/list.h> #include "amdgpu.h" static const guid_t MCE = CPER_NOTIFY_MCE; @@ -257,6 +258,113 @@ struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev, return hdr; } +int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev, + struct aca_bank *bank) +{ + struct cper_hdr *fatal = NULL; + struct cper_sec_crashdump_reg_data reg_data = { 0 }; + int ret; + + fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1); + if (!fatal) { + dev_err(adev->dev, "fail to alloc cper entry for ue record\n"); + return -ENOMEM; + } + + reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); + reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); + + amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL); + ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data); + if (ret) + return ret; + + /*TODO: commit the cper entry to cper ring */ + + return 0; +} + +static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev, + enum aca_error_type aca_err_type) +{ + switch (aca_err_type) { + case ACA_ERROR_TYPE_UE: + return CPER_SEV_FATAL; + case ACA_ERROR_TYPE_CE: + return CPER_SEV_NON_FATAL_CORRECTED; + case ACA_ERROR_TYPE_DEFERRED: + return CPER_SEV_NON_FATAL_UNCORRECTED; + default: + dev_err(adev->dev, "Unknown ACA error type!\n"); + return CPER_SEV_FATAL; + } +} + +int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, + struct aca_banks *banks, + uint16_t bank_count) +{ + struct cper_hdr *corrected = NULL; + enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED; + uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 }; + struct aca_bank_node *node; + struct aca_bank *bank; + uint32_t i = 0; + int ret; + + corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count); + if (!corrected) { + dev_err(adev->dev, "fail to allocate cper entry for ce records\n"); + return -ENOMEM; + } + + /* Raise severity if any DE is detected in the ACA bank list */ + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + sev = CPER_SEV_NON_FATAL_UNCORRECTED; + break; + } + } + + amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev); + + /* Combine CE and UE in cper record */ + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]); + reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]); + reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]); + reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]); + reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); + reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); + reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); + reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); + + ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++, + amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type), + reg_data, CPER_ACA_REG_COUNT); + if (ret) + return ret; + } + + /*TODO: commit the cper entry to cper ring */ + + return 0; +} + int amdgpu_cper_init(struct amdgpu_device *adev) { mutex_init(&adev->cper.cper_lock); |