From 6e8746cb735166eaf3ceb086b31bb0431f5e3532 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 13 Jan 2023 11:27:58 +0800 Subject: EDAC/skx_common: Enable EDAC support for the "near" memory The current {skx,i10nm}_edac miss the EDAC support to decode errors from the 1st level memory (the fast "near" memory as cache) of the 2-level memory system. Introduce a helper function skx_error_in_mem() to check whether errors are from memory at the beginning of skx_mce_check_error(). As long as the errors are from memory (either the 1-level memory system or the 2-level memory system), decode the errors. Reported-and-tested-by: Youquan Song Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20230113032802.41752-1-qiuxu.zhuo@intel.com --- drivers/edac/skx_common.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/edac/skx_common.c') diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f0f8e98f6efb..5fdcb6d5b96b 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -632,12 +632,18 @@ static bool skx_error_in_1st_level_mem(const struct mce *m) if (!skx_mem_cfg_2lm) return false; - errcode = GET_BITFIELD(m->status, 0, 15); + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if ((errcode & 0xef80) != 0x280) - return false; + return errcode == MCACOD_EXT_MEM_ERR; +} - return true; +static bool skx_error_in_mem(const struct mce *m) +{ + u32 errcode; + + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + + return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, @@ -651,8 +657,8 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; - /* ignore unless this is memory related with an address */ - if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + /* Ignore unless this is memory related with an address */ + if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); -- cgit v1.2.3 From d2415e2e5330fc11f4c688fa518751bdc90259f5 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 13 Jan 2023 11:27:59 +0800 Subject: EDAC/skx_common: Delete duplicated and unreachable code skx_mce_check_error() returns early if the error isn't from memory. So when skx_mce_output_error() is invoked from skx_mce_check_error(), it doesn't need to re-check whether the error is from memory. Delete the duplicated and unreachable code from skx_mce_output_error(). Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20230113032802.41752-1-qiuxu.zhuo@intel.com --- drivers/edac/skx_common.c | 58 +++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 37 deletions(-) (limited to 'drivers/edac/skx_common.c') diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 5fdcb6d5b96b..fd8186fe005c 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -560,44 +560,28 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, tp_event = HW_EVENT_ERR_CORRECTED; } - /* - * According to Intel Architecture spec vol 3B, - * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" - * memory errors should fit one of these masks: - * 000f 0000 1mmm cccc (binary) - * 000f 0010 1mmm cccc (binary) [RAM used as cache] - * where: - * f = Correction Report Filtering Bit. If 1, subsequent errors - * won't be shown - * mmm = error type - * cccc = channel - * If the mask doesn't match, report an error to the parsing logic - */ - if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - scrub_err = true; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + scrub_err = true; + break; + default: + optype = "reserved"; + break; } + if (res->decoded_by_adxl) { len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", -- cgit v1.2.3