summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c108
1 files changed, 108 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
index 8ce5dc6efcf9..e66016e02c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -21,6 +21,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+#include <linux/list.h>
#include "amdgpu.h"
static const guid_t MCE = CPER_NOTIFY_MCE;
@@ -257,6 +258,113 @@ struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
return hdr;
}
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank)
+{
+ struct cper_hdr *fatal = NULL;
+ struct cper_sec_crashdump_reg_data reg_data = { 0 };
+ int ret;
+
+ fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+ if (!fatal) {
+ dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+ return -ENOMEM;
+ }
+
+ reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+ if (ret)
+ return ret;
+
+ /*TODO: commit the cper entry to cper ring */
+
+ return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+ enum aca_error_type aca_err_type)
+{
+ switch (aca_err_type) {
+ case ACA_ERROR_TYPE_UE:
+ return CPER_SEV_FATAL;
+ case ACA_ERROR_TYPE_CE:
+ return CPER_SEV_NON_FATAL_CORRECTED;
+ case ACA_ERROR_TYPE_DEFERRED:
+ return CPER_SEV_NON_FATAL_UNCORRECTED;
+ default:
+ dev_err(adev->dev, "Unknown ACA error type!\n");
+ return CPER_SEV_FATAL;
+ }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count)
+{
+ struct cper_hdr *corrected = NULL;
+ enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+ uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ uint32_t i = 0;
+ int ret;
+
+ corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+ if (!corrected) {
+ dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+ return -ENOMEM;
+ }
+
+ /* Raise severity if any DE is detected in the ACA bank list */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+ break;
+ }
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+ /* Combine CE and UE in cper record */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+ amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+ reg_data, CPER_ACA_REG_COUNT);
+ if (ret)
+ return ret;
+ }
+
+ /*TODO: commit the cper entry to cper ring */
+
+ return 0;
+}
+
int amdgpu_cper_init(struct amdgpu_device *adev)
{
mutex_init(&adev->cper.cper_lock);