diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-16 18:44:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-16 18:44:39 -0700 |
commit | 1cc3880a3c99f1be73f9024bd9db6248ffdcec70 (patch) | |
tree | 126762bc7ef642bda9ba234be65b47ebcd9f9d47 | |
parent | 490e142209da64243577a997b6d2ed050684ef7b (diff) | |
parent | 3f37a36b6282621d7c5a99b6911275f989766996 (diff) | |
download | linux-1cc3880a3c99f1be73f9024bd9db6248ffdcec70.tar.gz linux-1cc3880a3c99f1be73f9024bd9db6248ffdcec70.tar.bz2 linux-1cc3880a3c99f1be73f9024bd9db6248ffdcec70.zip |
Merge tag 'edac_for_4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov:
"It was pretty busy in EDAC land this time:
- Altera Arria10 L2 cache and On-Chip RAM ECC handling (Thor Thayer)
- Remove ad-hoc buffering of MCE records in sb_edac and i7core_edac
(Tony Luck)
- Do not register sb_edac with pci_register_driver() (Tony Luck)
- Add support for Skylake to ie31200_edac (Jason Baron)
- Do not register amd64_edac with pci_register_driver() (Borislav
Petkov)
... plus the usual round of cleanups and fixes all over the place"
* tag 'edac_for_4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (25 commits)
EDAC, amd64_edac: Drop pci_register_driver() use
EDAC, ie31200_edac: Add Skylake support
EDAC, sb_edac: Use cpu family/model in driver detection
EDAC, i7core: Remove double buffering of error records
EDAC, amd64_edac: Issue driver banner only on success
ARM: socfpga: Initialize Arria10 OCRAM ECC on startup
EDAC: Increment correct counter in edac_inc_ue_error()
EDAC, sb_edac: Remove double buffering of error records
EDAC: Fix used after kfree() error in edac_unregister_sysfs()
EDAC, altera: Avoid unused function warnings
EDAC, altera: Remove useless casts
ARM: socfpga: Enable Arria10 OCRAM ECC on startup
EDAC, altera: Add Arria10 OCRAM ECC support
Documentation: dt: socfpga: Add Altera Arria10 OCRAM binding
EDAC, altera: Make OCRAM ECC dependency check generic
EDAC, altera: Add register offset for ECC Enable
EDAC, altera: Extract error inject operations to a struct fops
ARM: socfpga: Enable Arria10 L2 cache ECC on startup
EDAC, altera: Add Arria10 L2 Cache ECC handling
Documentation, dt, socfpga: Add Altera Arria10 L2 cache binding
...
-rw-r--r-- | Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt | 50 | ||||
-rw-r--r-- | arch/arm/mach-socfpga/core.h | 2 | ||||
-rw-r--r-- | arch/arm/mach-socfpga/l2_cache.c | 49 | ||||
-rw-r--r-- | arch/arm/mach-socfpga/ocram.c | 133 | ||||
-rw-r--r-- | arch/arm/mach-socfpga/socfpga.c | 12 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 5 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 412 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 128 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 129 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 2 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 2 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 3 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 81 | ||||
-rw-r--r-- | drivers/edac/ie31200_edac.c | 121 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 222 |
15 files changed, 899 insertions, 452 deletions
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt index 885f93d14ef9..5a6b16070a33 100644 --- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt +++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt @@ -3,6 +3,7 @@ This driver uses the EDAC framework to implement the SOCFPGA ECC Manager. The ECC Manager counts and corrects single bit errors and counts/handles double bit errors which are uncorrectable. +Cyclone5 and Arria5 ECC Manager Required Properties: - compatible : Should be "altr,socfpga-ecc-manager" - #address-cells: must be 1 @@ -47,3 +48,52 @@ Example: interrupts = <0 178 1>, <0 179 1>; }; }; + +Arria10 SoCFPGA ECC Manager +The Arria10 SoC ECC Manager handles the IRQs for each peripheral +in a shared register instead of individual IRQs like the Cyclone5 +and Arria5. Therefore the device tree is different as well. + +Required Properties: +- compatible : Should be "altr,socfpga-a10-ecc-manager" +- altr,sysgr-syscon : phandle to Arria10 System Manager Block + containing the ECC manager registers. +- #address-cells: must be 1 +- #size-cells: must be 1 +- interrupts : Should be single bit error interrupt, then double bit error + interrupt. Note the rising edge type. +- ranges : standard definition, should translate from local addresses + +Subcomponents: + +L2 Cache ECC +Required Properties: +- compatible : Should be "altr,socfpga-a10-l2-ecc" +- reg : Address and size for ECC error interrupt clear registers. + +On-Chip RAM ECC +Required Properties: +- compatible : Should be "altr,socfpga-a10-ocram-ecc" +- reg : Address and size for ECC block registers. + +Example: + + eccmgr: eccmgr@ffd06000 { + compatible = "altr,socfpga-a10-ecc-manager"; + altr,sysmgr-syscon = <&sysmgr>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 IRQ_TYPE_LEVEL_HIGH>; + ranges; + + l2-ecc@ffd06010 { + compatible = "altr,socfpga-a10-l2-ecc"; + reg = <0xffd06010 0x4>; + }; + + ocram-ecc@ff8c3000 { + compatible = "altr,socfpga-a10-ocram-ecc"; + reg = <0xff8c3000 0x90>; + }; + }; diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index 575195be6687..65e1817d8afe 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -38,6 +38,8 @@ extern void socfpga_init_clocks(void); extern void socfpga_sysmgr_init(void); void socfpga_init_l2_ecc(void); void socfpga_init_ocram_ecc(void); +void socfpga_init_arria10_l2_ecc(void); +void socfpga_init_arria10_ocram_ecc(void); extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c index e3907ab58d05..4267c95f2158 100644 --- a/arch/arm/mach-socfpga/l2_cache.c +++ b/arch/arm/mach-socfpga/l2_cache.c @@ -17,6 +17,20 @@ #include <linux/of_platform.h> #include <linux/of_address.h> +#include "core.h" + +/* A10 System Manager L2 ECC Control register */ +#define A10_MPU_CTRL_L2_ECC_OFST 0x0 +#define A10_MPU_CTRL_L2_ECC_EN BIT(0) + +/* A10 System Manager Global IRQ Mask register */ +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_CLR_L2 BIT(0) + +/* A10 System Manager L2 ECC IRQ Clear register */ +#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSMGR_MPU_CLEAR_L2_ECC (BIT(31) | BIT(15)) + void socfpga_init_l2_ecc(void) { struct device_node *np; @@ -39,3 +53,38 @@ void socfpga_init_l2_ecc(void) writel(0x01, mapped_l2_edac_addr); iounmap(mapped_l2_edac_addr); } + +void socfpga_init_arria10_l2_ecc(void) +{ + struct device_node *np; + void __iomem *mapped_l2_edac_addr; + + /* Find the L2 EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n"); + return; + } + + mapped_l2_edac_addr = of_iomap(np, 0); + of_node_put(np); + if (!mapped_l2_edac_addr) { + pr_err("Unable to find L2 ECC mapping in dtb\n"); + return; + } + + if (!sys_manager_base_addr) { + pr_err("System Mananger not mapped for L2 ECC\n"); + goto exit; + } + /* Clear any pending IRQs */ + writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr + + A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST)); + /* Enable ECC */ + writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr + + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr + + A10_MPU_CTRL_L2_ECC_OFST); +exit: + iounmap(mapped_l2_edac_addr); +} diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c index 60ec643ac2be..10d673252395 100644 --- a/arch/arm/mach-socfpga/ocram.c +++ b/arch/arm/mach-socfpga/ocram.c @@ -13,12 +13,15 @@ * You should have received a copy of the GNU General Public License along with * this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/delay.h> #include <linux/io.h> #include <linux/genalloc.h> #include <linux/module.h> #include <linux/of_address.h> #include <linux/of_platform.h> +#include "core.h" + #define ALTR_OCRAM_CLEAR_ECC 0x00000018 #define ALTR_OCRAM_ECC_EN 0x00000019 @@ -47,3 +50,133 @@ void socfpga_init_ocram_ecc(void) iounmap(mapped_ocr_edac_addr); } + +/* Arria10 OCRAM Section */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) +#define ALTR_A10_ECC_INITA BIT(16) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) + +#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 + +static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value |= bit_mask; + writel(value, ioaddr); +} + +static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + value &= ~bit_mask; + writel(value, ioaddr); +} + +static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr); + + return (value & bit_mask) ? 1 : 0; +} + +/* + * This function uses the memory initialization block in the Arria10 ECC + * controller to initialize/clear the entire memory data and ECC data. + */ +static int altr_init_memory_port(void __iomem *ioaddr) +{ + int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US; + + ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST)); + while (limit--) { + if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, + (ioaddr + ALTR_A10_ECC_INITSTAT_OFST))) + break; + udelay(1); + } + if (limit < 0) + return -EBUSY; + + /* Clear any pending ECC interrupts */ + writel(ALTR_A10_ECC_ERRPENA_MASK, + (ioaddr + ALTR_A10_ECC_INTSTAT_OFST)); + + return 0; +} + +void socfpga_init_arria10_ocram_ecc(void) +{ + struct device_node *np; + int ret = 0; + void __iomem *ecc_block_base; + + if (!sys_manager_base_addr) { + pr_err("SOCFPGA: sys-mgr is not initialized\n"); + return; + } + + /* Find the OCRAM EDAC device tree node */ + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc"); + if (!np) { + pr_err("Unable to find socfpga-a10-ocram-ecc\n"); + return; + } + + /* Map the ECC Block */ + ecc_block_base = of_iomap(np, 0); + of_node_put(np); + if (!ecc_block_base) { + pr_err("Unable to map OCRAM ECC block\n"); + return; + } + + /* Disable ECC */ + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST); + ecc_clear_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + + /* Ensure all writes complete */ + wmb(); + + /* Use HW initialization block to initialize memory for ECC */ + ret = altr_init_memory_port(ecc_block_base); + if (ret) { + pr_err("ECC: cannot init OCRAM PORTA memory\n"); + goto exit; + } + + /* Enable ECC */ + ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL, + (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); + ecc_set_bits(ALTR_A10_ECC_SERRINTEN, + (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST)); + writel(ALTR_A10_OCRAM_ECC_EN_CTL, + sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST); + + /* Ensure all writes complete */ + wmb(); +exit: + iounmap(ecc_block_base); +} diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 7e0aad2ec3d1..dde14f7bf2c3 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -66,6 +66,16 @@ static void __init socfpga_init_irq(void) socfpga_init_ocram_ecc(); } +static void __init socfpga_arria10_init_irq(void) +{ + irqchip_init(); + socfpga_sysmgr_init(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C)) + socfpga_init_arria10_l2_ecc(); + if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM)) + socfpga_init_arria10_ocram_ecc(); +} + static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd) { u32 temp; @@ -113,7 +123,7 @@ static const char *altera_a10_dt_match[] = { DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10") .l2c_aux_val = 0, .l2c_aux_mask = ~0, - .init_irq = socfpga_init_irq, + .init_irq = socfpga_arria10_init_irq, .restart = socfpga_arria10_restart, .dt_compat = altera_a10_dt_match, MACHINE_END diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 37755e63cc28..6ca7474baf4a 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -378,12 +378,11 @@ config EDAC_ALTERA config EDAC_ALTERA_L2C bool "Altera L2 Cache ECC" - depends on EDAC_ALTERA=y - select CACHE_L2X0 + depends on EDAC_ALTERA=y && CACHE_L2X0 help Support for error detection and correction on the Altera L2 cache Memory for Altera SoCs. This option - requires L2 cache so it will force that selection. + requires L2 cache. config EDAC_ALTERA_OCRAM bool "Altera On-Chip RAM ECC" diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 63e42098726d..5b4d223d6d68 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -24,6 +24,7 @@ #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mfd/syscon.h> +#include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/regmap.h> @@ -78,27 +79,6 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; -/************************** EDAC Device Defines **************************/ - -/* OCRAM ECC Management Group Defines */ -#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 -#define ALTR_OCR_ECC_EN BIT(0) -#define ALTR_OCR_ECC_INJS BIT(1) -#define ALTR_OCR_ECC_INJD BIT(2) -#define ALTR_OCR_ECC_SERR BIT(3) -#define ALTR_OCR_ECC_DERR BIT(4) - -/* L2 ECC Management Group Defines */ -#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 -#define ALTR_L2_ECC_EN BIT(0) -#define ALTR_L2_ECC_INJS BIT(1) -#define ALTR_L2_ECC_INJD BIT(2) - -#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ -#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ -#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ -#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ - /*********************** EDAC Memory Controller Functions ****************/ /* The SDRAM controller uses the EDAC Memory Controller framework. */ @@ -252,8 +232,8 @@ static unsigned long get_total_mem(void) } static const struct of_device_id altr_sdram_ctrl_of_match[] = { - { .compatible = "altr,sdram-edac", .data = (void *)&c5_data}, - { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data}, + { .compatible = "altr,sdram-edac", .data = &c5_data}, + { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -570,28 +550,8 @@ module_platform_driver(altr_edac_driver); const struct edac_device_prv_data ocramecc_data; const struct edac_device_prv_data l2ecc_data; - -struct edac_device_prv_data { - int (*setup)(struct platform_device *pdev, void __iomem *base); - int ce_clear_mask; - int ue_clear_mask; - char dbgfs_name[20]; - void * (*alloc_mem)(size_t size, void **other); - void (*free_mem)(void *p, size_t size, void *other); - int ecc_enable_mask; - int ce_set_mask; - int ue_set_mask; - int trig_alloc_sz; -}; - -struct altr_edac_device_dev { - void __iomem *base; - int sb_irq; - int db_irq; - const struct edac_device_prv_data *data; - struct dentry *debugfs_dir; - char *edac_dev_name; -}; +const struct edac_device_prv_data a10_ocramecc_data; +const struct edac_device_prv_data a10_l2ecc_data; static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) { @@ -665,8 +625,9 @@ static ssize_t altr_edac_device_trig(struct file *file, if (ACCESS_ONCE(ptemp[i])) result = -1; /* Toggle Error bit (it is latched), leave ECC enabled */ - writel(error_mask, drvdata->base); - writel(priv->ecc_enable_mask, drvdata->base); + writel(error_mask, (drvdata->base + priv->set_err_ofst)); + writel(priv->ecc_enable_mask, (drvdata->base + + priv->set_err_ofst)); ptemp[i] = i; } /* Ensure it has been written out */ @@ -694,6 +655,16 @@ static const struct file_operations altr_edac_device_inject_fops = { .llseek = generic_file_llseek, }; +static ssize_t altr_edac_a10_device_trig(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject_fops = { + .open = simple_open, + .write = altr_edac_a10_device_trig, + .llseek = generic_file_llseek, +}; + static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, const struct edac_device_prv_data *priv) { @@ -708,17 +679,18 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR, drvdata->debugfs_dir, edac_dci, - &altr_edac_device_inject_fops)) + priv->inject_fops)) debugfs_remove_recursive(drvdata->debugfs_dir); } static const struct of_device_id altr_edac_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_L2C - { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data }, + { .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data }, + { .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data }, #endif #ifdef CONFIG_EDAC_ALTERA_OCRAM - { .compatible = "altr,socfpga-ocram-ecc", - .data = (void *)&ocramecc_data }, + { .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data }, + { .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data }, #endif {}, }; @@ -789,7 +761,7 @@ static int altr_edac_device_probe(struct platform_device *pdev) /* Check specific dependencies for the module */ if (drvdata->data->setup) { - res = drvdata->data->setup(pdev, drvdata->base); + res = drvdata->data->setup(drvdata); if (res) goto fail1; } @@ -856,6 +828,25 @@ module_platform_driver(altr_edac_device_driver); /*********************** OCRAM EDAC Device Functions *********************/ #ifdef CONFIG_EDAC_ALTERA_OCRAM +/* + * Test for memory's ECC dependencies upon entry because platform specific + * startup should have initialized the memory and enabled the ECC. + * Can't turn on ECC here because accessing un-initialized memory will + * cause CE/UE errors possibly causing an ABORT. + */ +static int altr_check_ecc_deps(struct altr_edac_device_dev *device) +{ + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask) + return 0; + + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: No ECC present or ECC disabled.\n", + device->edac_dev_name); + return -ENODEV; +} static void *ocram_alloc_mem(size_t size, void **other) { @@ -891,36 +882,53 @@ static void ocram_free_mem(void *p, size_t size, void *other) gen_pool_free((struct gen_pool *)other, (u32)p, size); } -/* - * altr_ocram_check_deps() - * Test for OCRAM cache ECC dependencies upon entry because - * platform specific startup should have initialized the - * On-Chip RAM memory and enabled the ECC. - * Can't turn on ECC here because accessing un-initialized - * memory will cause CE/UE errors possibly causing an ABORT. - */ -static int altr_ocram_check_deps(struct platform_device *pdev, - void __iomem *base) +static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci, + bool sberr) { - if (readl(base) & ALTR_OCR_ECC_EN) - return 0; + void __iomem *base = dci->base; - edac_printk(KERN_ERR, EDAC_DEVICE, - "OCRAM: No ECC present or ECC disabled.\n"); - return -ENODEV; + if (sberr) { + writel(ALTR_A10_ECC_SERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + } else { + writel(ALTR_A10_ECC_DERRPENA, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + return IRQ_HANDLED; } const struct edac_device_prv_data ocramecc_data = { - .setup = altr_ocram_check_deps, + .setup = altr_check_ecc_deps, .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR), .dbgfs_name = "altr_ocram_trigger", .alloc_mem = ocram_alloc_mem, .free_mem = ocram_free_mem, .ecc_enable_mask = ALTR_OCR_ECC_EN, + .ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET, .ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS), .ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD), + .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET, .trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, +}; + +const struct edac_device_prv_data a10_ocramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM, + .dbgfs_name = "altr_ocram_trigger", + .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_OCRAM */ @@ -966,10 +974,13 @@ static void l2_free_mem(void *p, size_t size, void *other) * Bail if ECC is not enabled. * Note that L2 Cache Enable is forced at build time. */ -static int altr_l2_check_deps(struct platform_device *pdev, - void __iomem *base) +static int altr_l2_check_deps(struct altr_edac_device_dev *device) { - if (readl(base) & ALTR_L2_ECC_EN) + void __iomem *base = device->base; + const struct edac_device_prv_data *prv = device->data; + + if ((readl(base) & prv->ecc_enable_mask) == + prv->ecc_enable_mask) return 0; edac_printk(KERN_ERR, EDAC_DEVICE, @@ -977,6 +988,24 @@ static int altr_l2_check_deps(struct platform_device *pdev, return -ENODEV; } +static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci, + bool sberr) +{ + if (sberr) { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_SB); + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name); + } else { + regmap_write(dci->edac->ecc_mgr_map, + A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST, + A10_SYSGMR_MPU_CLEAR_L2_ECC_MB); + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + return IRQ_HANDLED; +} + const struct edac_device_prv_data l2ecc_data = { .setup = altr_l2_check_deps, .ce_clear_mask = 0, @@ -987,11 +1016,252 @@ const struct edac_device_prv_data l2ecc_data = { .ecc_enable_mask = ALTR_L2_ECC_EN, .ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS), .ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD), + .set_err_ofst = ALTR_L2_ECC_REG_OFFSET, + .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, +}; + +const struct edac_device_prv_data a10_l2ecc_data = { + .setup = altr_l2_check_deps, + .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR, + .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR, + .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2, + .dbgfs_name = "altr_l2_trigger", + .alloc_mem = l2_alloc_mem, + .free_mem = l2_free_mem, + .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL, + .ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK, + .ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK, + .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST, + .ecc_irq_handler = altr_edac_a10_l2_irq, .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE, + .inject_fops = &altr_edac_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_L2C */ +/********************* Arria10 EDAC Device Functions *************************/ + +/* + * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5 + * because 2 IRQs are shared among the all ECC peripherals. The ECC + * manager manages the IRQs and the children. + * Based on xgene_edac.c peripheral code. + */ + +static ssize_t altr_edac_a10_device_trig(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) + writel(priv->ue_set_mask, set_addr); + else + writel(priv->ce_set_mask, set_addr); + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id) +{ + irqreturn_t rc = IRQ_NONE; + struct altr_arria10_edac *edac = dev_id; + struct altr_edac_device_dev *dci; + int irq_status; + bool sberr = (irq == edac->sb_irq) ? 1 : 0; + int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST : + A10_SYSMGR_ECC_INTSTAT_DERR_OFST; + + regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); + + if ((irq != edac->sb_irq) && (irq != edac->db_irq)) { + WARN_ON(1); + } else { + list_for_each_entry(dci, &edac->a10_ecc_devices, next) { + if (irq_status & dci->data->irq_status_mask) + rc = dci->data->ecc_irq_handler(dci, sberr); + } + } + + return rc; +} + +static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, + struct device_node *np) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *altdev; + char *ecc_name = (char *)np->name; + struct resource res; + int edac_idx; + int rc = 0; + const struct edac_device_prv_data *prv; + /* Get matching node and check for valid result */ + const struct of_device_id *pdev_id = + of_match_node(altr_edac_device_of_match, np); + if (IS_ERR_OR_NULL(pdev_id)) + return -ENODEV; + + /* Get driver specific data for this EDAC device */ + prv = pdev_id->data; + if (IS_ERR_OR_NULL(prv)) + return -ENODEV; + + if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) + return -ENOMEM; + + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: no resource address\n", ecc_name); + goto err_release_group; + } + + edac_idx = edac_device_alloc_index(); + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, + 1, ecc_name, 1, 0, NULL, 0, + edac_idx); + + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate EDAC device\n", ecc_name); + rc = -ENOMEM; + goto err_release_group; + } + + altdev = dci->pvt_info; + dci->dev = edac->dev; + altdev->edac_dev_name = ecc_name; + altdev->edac_idx = edac_idx; + altdev->edac = edac; + altdev->edac_dev = dci; + altdev->data = prv; + altdev->ddev = *edac->dev; + dci->dev = &altdev->ddev; + dci->ctl_name = "Altera ECC Manager"; + dci->mod_name = ecc_name; + dci->dev_name = ecc_name; + + altdev->base = devm_ioremap_resource(edac->dev, &res); + if (IS_ERR(altdev->base)) { + rc = PTR_ERR(altdev->base); + goto err_release_group1; + } + + /* Check specific dependencies for the module */ + if (altdev->data->setup) { + rc = altdev->data->setup(altdev); + if (rc) + goto err_release_group1; + } + + rc = edac_device_add_device(dci); + if (rc) { + dev_err(edac->dev, "edac_device_add_device failed\n"); + rc = -ENOMEM; + goto err_release_group1; + } + + altr_create_edacdev_dbgfs(dci, prv); + + list_add(&altdev->next, &edac->a10_ecc_devices); + + devres_remove_group(edac->dev, altr_edac_a10_device_add); + + return 0; + +err_release_group1: + edac_device_free_ctl_info(dci); +err_release_group: + edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__); + devres_release_group(edac->dev, NULL); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, rc); + + return rc; +} + +static int altr_edac_a10_probe(struct platform_device *pdev) +{ + struct altr_arria10_edac *edac; + struct device_node *child; + int rc; + + edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); + if (!edac) + return -ENOMEM; + + edac->dev = &pdev->dev; + platform_set_drvdata(pdev, edac); + INIT_LIST_HEAD(&edac->a10_ecc_devices); + + edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon"); + if (IS_ERR(edac->ecc_mgr_map)) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to get syscon altr,sysmgr-syscon\n"); + return PTR_ERR(edac->ecc_mgr_map); + } + + edac->sb_irq = platform_get_irq(pdev, 0); + rc = devm_request_irq(&pdev->dev, edac->sb_irq, + altr_edac_a10_irq_handler, + IRQF_SHARED, dev_name(&pdev->dev), edac); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n"); + return rc; + } + + edac->db_irq = platform_get_irq(pdev, 1); + rc = devm_request_irq(&pdev->dev, edac->db_irq, + altr_edac_a10_irq_handler, + IRQF_SHARED, dev_name(&pdev->dev), edac); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); + return rc; + } + + for_each_child_of_node(pdev->dev.of_node, child) { + if (!of_device_is_available(child)) + continue; + if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc")) + altr_edac_a10_device_add(edac, child); + else if (of_device_is_compatible(child, + "altr,socfpga-a10-ocram-ecc")) + altr_edac_a10_device_add(edac, child); + } + + return 0; +} + +static const struct of_device_id altr_edac_a10_of_match[] = { + { .compatible = "altr,socfpga-a10-ecc-manager" }, + {}, +}; +MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); + +static struct platform_driver altr_edac_a10_driver = { + .probe = altr_edac_a10_probe, + .driver = { + .name = "socfpga_a10_ecc_manager", + .of_match_table = altr_edac_a10_of_match, + }, +}; +module_platform_driver(altr_edac_a10_driver); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 953077d3e4f3..42090f36ba6e 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -195,4 +195,132 @@ struct altr_sdram_mc_data { const struct altr_sdram_prv_data *data; }; +/************************** EDAC Device Defines **************************/ +/***** General Device Trigger Defines *****/ +#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */ +#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */ +#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */ +#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */ + +/******* Cyclone5 and Arria5 Defines *******/ +/* OCRAM ECC Management Group Defines */ +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_REG_OFFSET 0x00 +#define ALTR_OCR_ECC_EN BIT(0) +#define ALTR_OCR_ECC_INJS BIT(1) +#define ALTR_OCR_ECC_INJD BIT(2) +#define ALTR_OCR_ECC_SERR BIT(3) +#define ALTR_OCR_ECC_DERR BIT(4) + +/* L2 ECC Management Group Defines */ +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_REG_OFFSET 0x00 +#define ALTR_L2_ECC_EN BIT(0) +#define ALTR_L2_ECC_INJS BIT(1) +#define ALTR_L2_ECC_INJD BIT(2) + +/* Arria10 General ECC Block Module Defines */ +#define ALTR_A10_ECC_CTRL_OFST 0x08 +#define ALTR_A10_ECC_EN BIT(0) +#define ALTR_A10_ECC_INITA BIT(16) +#define ALTR_A10_ECC_INITB BIT(24) + +#define ALTR_A10_ECC_INITSTAT_OFST 0x0C +#define ALTR_A10_ECC_INITCOMPLETEA BIT(0) +#define ALTR_A10_ECC_INITCOMPLETEB BIT(8) + +#define ALTR_A10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_A10_ECC_SERRINTEN BIT(0) + +#define ALTR_A10_ECC_INTSTAT_OFST 0x20 +#define ALTR_A10_ECC_SERRPENA BIT(0) +#define ALTR_A10_ECC_DERRPENA BIT(8) +#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \ + ALTR_A10_ECC_DERRPENA) +#define ALTR_A10_ECC_SERRPENB BIT(16) +#define ALTR_A10_ECC_DERRPENB BIT(24) +#define ALTR_A10_ECC_ERRPENB_MASK (ALTR_A10_ECC_SERRPENB | \ + ALTR_A10_ECC_DERRPENB) + +#define ALTR_A10_ECC_INTTEST_OFST 0x24 +#define ALTR_A10_ECC_TSERRA BIT(0) +#define ALTR_A10_ECC_TDERRA BIT(8) + +/* ECC Manager Defines */ +#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 +#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) + +#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C +#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 +#define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0) +#define A10_SYSMGR_ECC_INTSTAT_OCRAM BIT(1) + +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8 +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15) +#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB BIT(31) + +/* Arria 10 L2 ECC Management Group Defines */ +#define ALTR_A10_L2_ECC_CTL_OFST 0x0 +#define ALTR_A10_L2_ECC_EN_CTL BIT(0) + +#define ALTR_A10_L2_ECC_STATUS 0xFFD060A4 +#define ALTR_A10_L2_ECC_STAT_OFST 0xA4 +#define ALTR_A10_L2_ECC_SERR_PEND BIT(0) +#define ALTR_A10_L2_ECC_MERR_PEND BIT(0) + +#define ALTR_A10_L2_ECC_CLR_OFST 0x4 +#define ALTR_A10_L2_ECC_SERR_CLR BIT(15) +#define ALTR_A10_L2_ECC_MERR_CLR BIT(31) + +#define ALTR_A10_L2_ECC_INJ_OFST ALTR_A10_L2_ECC_CTL_OFST +#define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101 +#define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101 + +/* Arria 10 OCRAM ECC Management Group Defines */ +#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0)) + +struct altr_edac_device_dev; + +struct edac_device_prv_data { + int (*setup)(struct altr_edac_device_dev *device); + int ce_clear_mask; + int ue_clear_mask; + int irq_status_mask; + char dbgfs_name[20]; + void * (*alloc_mem)(size_t size, void **other); + void (*free_mem)(void *p, size_t size, void *other); + int ecc_enable_mask; + int ecc_en_ofst; + int ce_set_mask; + int ue_set_mask; + int set_err_ofst; + irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci, + bool sb); + int trig_alloc_sz; + const struct file_operations *inject_fops; +}; + +struct altr_edac_device_dev { + struct list_head next; + void __iomem *base; + int sb_irq; + int db_irq; + const struct edac_device_prv_data *data; + struct dentry *debugfs_dir; + char *edac_dev_name; + struct altr_arria10_edac *edac; + struct edac_device_ctl_info *edac_dev; + struct device ddev; + int edac_idx; +}; + +struct altr_arria10_edac { + struct device *dev; + struct regmap *ecc_mgr_map; + int sb_irq; + int db_irq; + struct list_head a10_ecc_devices; +}; + #endif /* #ifndef _ALTERA_EDAC_H */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d87a47547ba5..624e2f78339c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,11 +15,6 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -/* - * count successfully initialized driver instances for setup_pci_device() - */ -static atomic_t drv_instances = ATOMIC_INIT(0); - /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -1918,7 +1913,7 @@ static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC, + .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -1928,7 +1923,7 @@ static struct amd64_family_type family_types[] = { [F10_CPUS] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC, + .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1938,7 +1933,7 @@ static struct amd64_family_type family_types[] = { [F15_CPUS] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1948,7 +1943,7 @@ static struct amd64_family_type family_types[] = { [F15_M30H_CPUS] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1958,7 +1953,7 @@ static struct amd64_family_type family_types[] = { [F15_M60H_CPUS] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1968,7 +1963,7 @@ static struct amd64_family_type family_types[] = { [F16_CPUS] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -1978,7 +1973,7 @@ static struct amd64_family_type family_types[] = { [F16_M30H_CPUS] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, - .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3, + .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2227,13 +2222,13 @@ static inline void decode_bus_error(int node_id, struct mce *m) } /* - * Use pvt->F2 which contains the F2 CPU PCI device to get the related - * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error. + * Use pvt->F3 which contains the F3 CPU PCI device to get the related + * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error. */ -static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) +static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id) { /* Reserve the ADDRESS MAP Device */ - pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2); + pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3); if (!pvt->F1) { amd64_err("error address map device not found: " "vendor %x device 0x%x (broken BIOS?)\n", @@ -2241,15 +2236,15 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) return -ENODEV; } - /* Reserve the MISC Device */ - pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2); - if (!pvt->F3) { + /* Reserve the DCT Device */ + pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3); + if (!pvt->F2) { pci_dev_put(pvt->F1); pvt->F1 = NULL; - amd64_err("error F3 device not found: " + amd64_err("error F2 device not found: " "vendor %x device 0x%x (broken BIOS?)\n", - PCI_VENDOR_ID_AMD, f3_id); + PCI_VENDOR_ID_AMD, f2_id); return -ENODEV; } @@ -2263,7 +2258,7 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id) static void free_mc_sibling_devs(struct amd64_pvt *pvt) { pci_dev_put(pvt->F1); - pci_dev_put(pvt->F3); + pci_dev_put(pvt->F2); } /* @@ -2778,14 +2773,14 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -static int init_one_instance(struct pci_dev *F2) +static int init_one_instance(unsigned int nid) { - struct amd64_pvt *pvt = NULL; + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; + struct amd64_pvt *pvt = NULL; int err = 0, ret; - u16 nid = amd_pci_dev_to_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2793,7 +2788,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_ret; pvt->mc_node_id = nid; - pvt->F2 = F2; + pvt->F3 = F3; ret = -EINVAL; fam_type = per_family_init(pvt); @@ -2801,7 +2796,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_free; ret = -ENODEV; - err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id); + err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id); if (err) goto err_free; @@ -2836,7 +2831,7 @@ static int init_one_instance(struct pci_dev *F2) goto err_siblings; mci->pvt_info = pvt; - mci->pdev = &pvt->F2->dev; + mci->pdev = &pvt->F3->dev; setup_mci_misc_attrs(mci, fam_type); @@ -2855,8 +2850,6 @@ static int init_one_instance(struct pci_dev *F2) amd_register_ecc_decoder(decode_bus_error); - atomic_inc(&drv_instances); - return 0; err_add_mc: @@ -2872,19 +2865,11 @@ err_ret: return ret; } -static int probe_one_instance(struct pci_dev *pdev, - const struct pci_device_id *mc_type) +static int probe_one_instance(unsigned int nid) { - u16 nid = amd_pci_dev_to_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; - int ret = 0; - - ret = pci_enable_device(pdev); - if (ret < 0) { - edac_dbg(0, "ret=%d\n", ret); - return -EIO; - } + int ret; ret = -ENOMEM; s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); @@ -2905,7 +2890,7 @@ static int probe_one_instance(struct pci_dev *pdev, goto err_enable; } - ret = init_one_instance(pdev); + ret = init_one_instance(nid); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); restore_ecc_error_reporting(s, nid, F3); @@ -2921,19 +2906,18 @@ err_out: return ret; } -static void remove_one_instance(struct pci_dev *pdev) +static void remove_one_instance(unsigned int nid) { - struct mem_ctl_info *mci; - struct amd64_pvt *pvt; - u16 nid = amd_pci_dev_to_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; + struct mem_ctl_info *mci; + struct amd64_pvt *pvt; - mci = find_mci_by_dev(&pdev->dev); + mci = find_mci_by_dev(&F3->dev); WARN_ON(!mci); /* Remove from EDAC CORE tracking list */ - mci = edac_mc_del_mc(&pdev->dev); + mci = edac_mc_del_mc(&F3->dev); if (!mci) return; @@ -2957,31 +2941,6 @@ static void remove_one_instance(struct pci_dev *pdev) edac_mc_free(mci); } -/* - * This table is part of the interface for loading drivers for PCI devices. The - * PCI core identifies what devices are on a system during boot, and then - * inquiry this table to see if this driver is for a given device found. - */ -static const struct pci_device_id amd64_pci_table[] = { - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) }, - {0, } -}; -MODULE_DEVICE_TABLE(pci, amd64_pci_table); - -static struct pci_driver amd64_pci_driver = { - .name = EDAC_MOD_STR, - .probe = probe_one_instance, - .remove = remove_one_instance, - .id_table = amd64_pci_table, - .driver.probe_type = PROBE_FORCE_SYNCHRONOUS, -}; - static void setup_pci_device(void) { struct mem_ctl_info *mci; @@ -3005,8 +2964,7 @@ static void setup_pci_device(void) static int __init amd64_edac_init(void) { int err = -ENODEV; - - printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); + int i; opstate_init(); @@ -3022,13 +2980,14 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - err = pci_register_driver(&amd64_pci_driver); - if (err) - goto err_pci; + for (i = 0; i < amd_nb_num(); i++) + if (probe_one_instance(i)) { + /* unwind properly */ + while (--i >= 0) + remove_one_instance(i); - err = -ENODEV; - if (!atomic_read(&drv_instances)) - goto err_no_instances; + goto err_pci; + } setup_pci_device(); @@ -3036,10 +2995,9 @@ static int __init amd64_edac_init(void) amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); #endif - return 0; + printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION); -err_no_instances: - pci_unregister_driver(&amd64_pci_driver); + return 0; err_pci: msrs_free(msrs); @@ -3055,10 +3013,13 @@ err_ret: static void __exit amd64_edac_exit(void) { + int i; + if (pci_ctl) edac_pci_release_generic_ctl(pci_ctl); - pci_unregister_driver(&amd64_pci_driver); + for (i = 0; i < amd_nb_num(); i++) + remove_one_instance(i); kfree(ecc_stngs); ecc_stngs = NULL; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index c0f248f3aaf9..c08870479054 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -422,7 +422,7 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; - u16 f1_id, f3_id; + u16 f1_id, f2_id; struct low_ops ops; }; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 1472f48c8ac6..6aa256b0a1ed 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -923,7 +923,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci, mci->ue_mc += count; if (!enable_per_layer_report) { - mci->ce_noinfo_count += count; + mci->ue_noinfo_count += count; return; } diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 26e65ab5932a..10c305b4a2e1 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -998,11 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + struct bus_type *bus = mci->bus; const char *name = mci->bus->name; edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(mci->bus); + bus_unregister(bus); kfree(name); } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 792bdae2b91d..8a68a5e943ea 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -271,16 +271,6 @@ struct i7core_pvt { bool is_registered, enable_scrub; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* DCLK Frequency used for computing scrub rate */ int dclk_freq; @@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, * i7core_check_error Retrieve and process errors reported by the * hardware. Called by the Core module. */ -static void i7core_check_error(struct mem_ctl_info *mci) +static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m) { struct i7core_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * losing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - goto check_ce_error; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - i7core_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - i7core_mce_output_error(mci, &pvt->mce_outentry[i]); + i7core_mce_output_error(mci, m); /* * Now, let's increment CE error counts */ -check_ce_error: if (!pvt->is_registered) i7core_udimm_check_mc_ecc_err(mci); else @@ -1849,12 +1798,8 @@ check_ce_error: } /* - * i7core_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by i7core_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->bank != 8) return NOTIFY_DONE; - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - i7core_check_error(mci); + i7core_check_error(mci, mce); /* Advise mcelog that the errors were handled */ return NOTIFY_STOP; @@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev) get_dimm_config(mci); /* record ptr to the generic device */ mci->pdev = &i7core_dev->pdev[0]->dev; - /* Set the function pointer to an actual operation function */ - mci->edac_check = i7core_check_error; /* Enable scrubrate setting */ if (pvt->enable_scrub) diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c index 18d77ace4813..1c88d9707495 100644 --- a/drivers/edac/ie31200_edac.c +++ b/drivers/edac/ie31200_edac.c @@ -17,6 +17,7 @@ * 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller + * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers * * Based on Intel specification: * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf @@ -55,6 +56,7 @@ #define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 +#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918 #define IE31200_DIMMS 4 #define IE31200_RANKS 8 @@ -105,8 +107,11 @@ * 1 Multiple Bit Error Status (MERRSTS) * 0 Correctable Error Status (CERRSTS) */ + #define IE31200_C0ECCERRLOG 0x40c8 #define IE31200_C1ECCERRLOG 0x44c8 +#define IE31200_C0ECCERRLOG_SKL 0x4048 +#define IE31200_C1ECCERRLOG_SKL 0x4448 #define IE31200_ECCERRLOG_CE BIT(0) #define IE31200_ECCERRLOG_UE BIT(1) #define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27) @@ -123,17 +128,28 @@ #define IE31200_CAPID0_DDPCD BIT(6) #define IE31200_CAPID0_ECC BIT(1) -#define IE31200_MAD_DIMM_0_OFFSET 0x5004 -#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) -#define IE31200_MAD_DIMM_A_RANK BIT(17) -#define IE31200_MAD_DIMM_A_WIDTH BIT(19) - -#define IE31200_PAGES(n) (n << (28 - PAGE_SHIFT)) +#define IE31200_MAD_DIMM_0_OFFSET 0x5004 +#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C +#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0) +#define IE31200_MAD_DIMM_A_RANK BIT(17) +#define IE31200_MAD_DIMM_A_RANK_SHIFT 17 +#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10) +#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10 +#define IE31200_MAD_DIMM_A_WIDTH BIT(19) +#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19 +#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8) +#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8 + +/* Skylake reports 1GB increments, everything else is 256MB */ +#define IE31200_PAGES(n, skl) \ + (n << (28 + (2 * skl) - PAGE_SHIFT)) static int nr_channels; struct ie31200_priv { void __iomem *window; + void __iomem *c0errlog; + void __iomem *c1errlog; }; enum ie31200_chips { @@ -157,9 +173,9 @@ static const struct ie31200_dev_info ie31200_devs[] = { }; struct dimm_data { - u8 size; /* in 256MB multiples */ + u8 size; /* in multiples of 256MB, except Skylake is 1GB */ u8 dual_rank : 1, - x16_width : 1; /* 0 means x8 width */ + x16_width : 2; /* 0 means x8 width */ }; static int how_many_channels(struct pci_dev *pdev) @@ -197,11 +213,10 @@ static bool ecc_capable(struct pci_dev *pdev) return true; } -static int eccerrlog_row(int channel, u64 log) +static int eccerrlog_row(u64 log) { - int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >> - IE31200_ECCERRLOG_RANK_SHIFT); - return rank | (channel * IE31200_RANKS_PER_CHANNEL); + return ((log & IE31200_ECCERRLOG_RANK_BITS) >> + IE31200_ECCERRLOG_RANK_SHIFT); } static void ie31200_clear_error_info(struct mem_ctl_info *mci) @@ -219,7 +234,6 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, { struct pci_dev *pdev; struct ie31200_priv *priv = mci->pvt_info; - void __iomem *window = priv->window; pdev = to_pci_dev(mci->pdev); @@ -232,9 +246,9 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, if (!(info->errsts & IE31200_ERRSTS_BITS)) return; - info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); if (nr_channels == 2) - info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG); + info->eccerrlog[1] = lo_hi_readq(priv->c1errlog); pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2); @@ -245,10 +259,10 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci, * should be UE info. */ if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) { - info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG); + info->eccerrlog[0] = lo_hi_readq(priv->c0errlog); if (nr_channels == 2) info->eccerrlog[1] = - lo_hi_readq(window + IE31200_C1ECCERRLOG); + lo_hi_readq(priv->c1errlog); } ie31200_clear_error_info(mci); @@ -274,14 +288,14 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci, if (log & IE31200_ECCERRLOG_UE) { edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, - eccerrlog_row(channel, log), + eccerrlog_row(log), channel, -1, "ie31200 UE", ""); } else if (log & IE31200_ECCERRLOG_CE) { edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, IE31200_ECCERRLOG_SYNDROME(log), - eccerrlog_row(channel, log), + eccerrlog_row(log), channel, -1, "ie31200 CE", ""); } @@ -326,6 +340,33 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev) return window; } +static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode, + int chan) +{ + dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0; + dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >> + (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4))); +} + +static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode, + int chan) +{ + dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE; + dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0; + dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0; +} + +static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan, + bool skl) +{ + if (skl) + __skl_populate_dimm_info(dd, addr_decode, chan); + else + __populate_dimm_info(dd, addr_decode, chan); +} + + static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) { int i, j, ret; @@ -334,7 +375,8 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL]; void __iomem *window; struct ie31200_priv *priv; - u32 addr_decode; + u32 addr_decode, mad_offset; + bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8); edac_dbg(0, "MC:\n"); @@ -363,7 +405,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) edac_dbg(3, "MC: init mci\n"); mci->pdev = &pdev->dev; - mci->mtype_cap = MEM_FLAG_DDR3; + if (skl) + mci->mtype_cap = MEM_FLAG_DDR4; + else + mci->mtype_cap = MEM_FLAG_DDR3; mci->edac_ctl_cap = EDAC_FLAG_SECDED; mci->edac_cap = EDAC_FLAG_SECDED; mci->mod_name = EDAC_MOD_STR; @@ -374,19 +419,24 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) mci->ctl_page_to_phys = NULL; priv = mci->pvt_info; priv->window = window; + if (skl) { + priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL; + priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL; + mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL; + } else { + priv->c0errlog = window + IE31200_C0ECCERRLOG; + priv->c1errlog = window + IE31200_C1ECCERRLOG; + mad_offset = IE31200_MAD_DIMM_0_OFFSET; + } /* populate DIMM info */ for (i = 0; i < IE31200_CHANNELS; i++) { - addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET + + addr_decode = readl(window + mad_offset + (i * 4)); edac_dbg(0, "addr_decode: 0x%x\n", addr_decode); for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) { - dimm_info[i][j].size = (addr_decode >> (j * 8)) & - IE31200_MAD_DIMM_SIZE; - dimm_info[i][j].dual_rank = (addr_decode & - (IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0; - dimm_info[i][j].x16_width = (addr_decode & - (IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0; + populate_dimm_info(&dimm_info[i][j], addr_decode, j, + skl); edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n", dimm_info[i][j].size, dimm_info[i][j].dual_rank, @@ -405,7 +455,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) struct dimm_info *dimm; unsigned long nr_pages; - nr_pages = IE31200_PAGES(dimm_info[j][i].size); + nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl); if (nr_pages == 0) continue; @@ -417,7 +467,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* just a guess */ - dimm->mtype = MEM_DDR3; + if (skl) + dimm->mtype = MEM_DDR4; + else + dimm->mtype = MEM_DDR3; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -426,7 +479,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx) dimm->nr_pages = nr_pages; edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages); dimm->grain = 8; /* same guess */ - dimm->mtype = MEM_DDR3; + if (skl) + dimm->mtype = MEM_DDR4; + else + dimm->mtype = MEM_DDR3; dimm->dtype = DEV_UNKNOWN; dimm->edac_mode = EDAC_UNKNOWN; } @@ -501,6 +557,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200}, { + PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + IE31200}, + { 0, } /* 0 terminated list. */ }; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 8bf745d2da7e..b4d0bf6534cf 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -21,6 +21,8 @@ #include <linux/smp.h> #include <linux/bitmap.h> #include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/mce.h> @@ -28,8 +30,6 @@ /* Static vars */ static LIST_HEAD(sbridge_edac_list); -static DEFINE_MUTEX(sbridge_edac_lock); -static int probed; /* * Alter this version for the module when modifications are made @@ -364,16 +364,6 @@ struct sbridge_pvt { bool is_mirrored, is_lockstep, is_close_pg; bool is_chan_hash; - /* Fifo double buffers */ - struct mce mce_entry[MCE_LOG_LEN]; - struct mce mce_outentry[MCE_LOG_LEN]; - - /* Fifo in/out counters */ - unsigned mce_in, mce_out; - - /* Count indicator to show errors not got */ - unsigned mce_overrun; - /* Memory description */ u64 tolm, tohm; struct knl_pvt knl; @@ -662,18 +652,6 @@ static const struct pci_id_table pci_dev_descr_broadwell_table[] = { {0,} /* 0 terminated list. */ }; -/* - * pci_device_id table for which devices we are looking for - */ -static const struct pci_device_id sbridge_pci_tbl[] = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)}, - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)}, - {0,} /* 0 terminated list. */ -}; - /**************************************************************************** Ancillary status routines @@ -3097,63 +3075,8 @@ err_parsing: } /* - * sbridge_check_error Retrieve and process errors reported by the - * hardware. Called by the Core module. - */ -static void sbridge_check_error(struct mem_ctl_info *mci) -{ - struct sbridge_pvt *pvt = mci->pvt_info; - int i; - unsigned count = 0; - struct mce *m; - - /* - * MCE first step: Copy all mce errors into a temporary buffer - * We use a double buffering here, to reduce the risk of - * loosing an error. - */ - smp_rmb(); - count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in) - % MCE_LOG_LEN; - if (!count) - return; - - m = pvt->mce_outentry; - if (pvt->mce_in + count > MCE_LOG_LEN) { - unsigned l = MCE_LOG_LEN - pvt->mce_in; - - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l); - smp_wmb(); - pvt->mce_in = 0; - count -= l; - m += l; - } - memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count); - smp_wmb(); - pvt->mce_in += count; - - smp_rmb(); - if (pvt->mce_overrun) { - sbridge_printk(KERN_ERR, "Lost %d memory errors\n", - pvt->mce_overrun); - smp_wmb(); - pvt->mce_overrun = 0; - } - - /* - * MCE second step: parse errors and display - */ - for (i = 0; i < count; i++) - sbridge_mce_output_error(mci, &pvt->mce_outentry[i]); -} - -/* - * sbridge_mce_check_error Replicates mcelog routine to get errors - * This routine simply queues mcelog errors, and - * return. The error itself should be handled later - * by sbridge_check_error. - * WARNING: As this routine should be called at NMI time, extra care should - * be taken to avoid deadlocks, and to be as fast as possible. + * Check that logging is enabled and that this is the right type + * of error for us to handle. */ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) @@ -3198,21 +3121,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, "%u APIC %x\n", mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid); - smp_rmb(); - if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) { - smp_wmb(); - pvt->mce_overrun++; - return NOTIFY_DONE; - } - - /* Copy memory error at the ringbuffer */ - memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce)); - smp_wmb(); - pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN; - - /* Handle fatal errors immediately */ - if (mce->mcgstatus & 1) - sbridge_check_error(mci); + sbridge_mce_output_error(mci, mce); /* Advice mcelog that the error were handled */ return NOTIFY_STOP; @@ -3298,9 +3207,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) mci->dev_name = pci_name(pdev); mci->ctl_page_to_phys = NULL; - /* Set the function pointer to an actual operation function */ - mci->edac_check = sbridge_check_error; - pvt->info.type = type; switch (type) { case IVY_BRIDGE: @@ -3448,62 +3354,40 @@ fail0: return rc; } +#define ICPU(model, table) \ + { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } + +/* Order here must match "enum type" */ +static const struct x86_cpu_id sbridge_cpuids[] = { + ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ + ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ + ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ + ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ + ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ + { } +}; +MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); + /* - * sbridge_probe Probe for ONE instance of device to see if it is + * sbridge_probe Get all devices and register memory controllers * present. * return: * 0 for FOUND a device * < 0 for error code */ -static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static int sbridge_probe(const struct x86_cpu_id *id) { int rc = -ENODEV; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; - enum type type = SANDY_BRIDGE; + struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data; /* get the pci devices we want to reserve for our use */ - mutex_lock(&sbridge_edac_lock); - - /* - * All memory controllers are allocated at the first pass. - */ - if (unlikely(probed >= 1)) { - mutex_unlock(&sbridge_edac_lock); - return -ENODEV; - } - probed++; + rc = sbridge_get_all_devices(&num_mc, ptable); - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_ibridge_table); - type = IVY_BRIDGE; - break; - case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_sbridge_table); - type = SANDY_BRIDGE; - break; - case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_haswell_table); - type = HASWELL; - break; - case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0: - rc = sbridge_get_all_devices(&num_mc, - pci_dev_descr_broadwell_table); - type = BROADWELL; - break; - case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0: - rc = sbridge_get_all_devices_knl(&num_mc, - pci_dev_descr_knl_table); - type = KNIGHTS_LANDING; - break; - } if (unlikely(rc < 0)) { - edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device); + edac_dbg(0, "couldn't get all devices\n"); goto fail0; } @@ -3514,14 +3398,13 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id) mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev, type); + rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids); if (unlikely(rc < 0)) goto fail1; } sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION); - mutex_unlock(&sbridge_edac_lock); return 0; fail1: @@ -3530,74 +3413,47 @@ fail1: sbridge_put_all_devices(); fail0: - mutex_unlock(&sbridge_edac_lock); return rc; } /* - * sbridge_remove destructor for one instance of device + * sbridge_remove cleanup * */ -static void sbridge_remove(struct pci_dev *pdev) +static void sbridge_remove(void) { struct sbridge_dev *sbridge_dev; edac_dbg(0, "\n"); - /* - * we have a trouble here: pdev value for removal will be wrong, since - * it will point to the X58 register used to detect that the machine - * is a Nehalem or upper design. However, due to the way several PCI - * devices are grouped together to provide MC functionality, we need - * to use a different method for releasing the devices - */ - - mutex_lock(&sbridge_edac_lock); - - if (unlikely(!probed)) { - mutex_unlock(&sbridge_edac_lock); - return; - } - list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) sbridge_unregister_mci(sbridge_dev); /* Release PCI resources */ sbridge_put_all_devices(); - - probed--; - - mutex_unlock(&sbridge_edac_lock); } -MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl); - -/* - * sbridge_driver pci_driver structure for this module - * - */ -static struct pci_driver sbridge_driver = { - .name = "sbridge_edac", - .probe = sbridge_probe, - .remove = sbridge_remove, - .id_table = sbridge_pci_tbl, -}; - /* * sbridge_init Module entry function * Try to initialize this module for its devices */ static int __init sbridge_init(void) { - int pci_rc; + const struct x86_cpu_id *id; + int rc; edac_dbg(2, "\n"); + id = x86_match_cpu(sbridge_cpuids); + if (!id) + return -ENODEV; + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - pci_rc = pci_register_driver(&sbridge_driver); - if (pci_rc >= 0) { + rc = sbridge_probe(id); + + if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); if (get_edac_report_status() == EDAC_REPORTING_DISABLED) sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); @@ -3605,9 +3461,9 @@ static int __init sbridge_init(void) } sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n", - pci_rc); + rc); - return pci_rc; + return rc; } /* @@ -3617,7 +3473,7 @@ static int __init sbridge_init(void) static void __exit sbridge_exit(void) { edac_dbg(2, "\n"); - pci_unregister_driver(&sbridge_driver); + sbridge_remove(); mce_unregister_decode_chain(&sbridge_mce_dec); } |