diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 20:42:33 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 20:42:33 -0800 |
commit | ad6c2c2eb34f234d6253292b9b3c047614fbfe7e (patch) | |
tree | 8ceb00db9874c09f3002b5ca579f1f9146b30a28 /drivers/acpi/apei | |
parent | 19cc90f58d4f2538b4cf5371681a057d2e5209f2 (diff) | |
parent | b0769891ba7baa53f270dc70d71934748beb4c5b (diff) | |
download | linux-stable-ad6c2c2eb34f234d6253292b9b3c047614fbfe7e.tar.gz linux-stable-ad6c2c2eb34f234d6253292b9b3c047614fbfe7e.tar.bz2 linux-stable-ad6c2c2eb34f234d6253292b9b3c047614fbfe7e.zip |
Merge branch 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac
Pull EDAC fixes and ghes-edac from Mauro Carvalho Chehab:
"For:
- Some fixes at edac drivers (i7core_edac, sb_edac, i3200_edac);
- error injection support for i5100, when EDAC debug is enabled;
- fix edac when it is loaded builtin (early init for the subsystem);
- a "Firmware First" EDAC driver, allowing ghes to report errors via
EDAC (ghes-edac).
With regards to ghes-edac, this fixes a longstanding BZ at Red Hat
that happens with Nehalem and Sandy Bridge CPUs: when both GHES and
i7core_edac or sb_edac are running, the error reports are
unpredictable, as both BIOS and OS race to access the registers. With
ghes-edac, the EDAC core will refuse to register any other concurrent
memory error driver.
This patchset moves the ghes struct definitions to a separate header
file (include/acpi/ghes.h) and adds 3 hooks at apei/ghes.c to
register/unregister and to report errors via ghes-edac. Those changes
were acked by ghes driver maintainer (Huang)."
* 'linux_next' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac: (30 commits)
i5100_edac: convert to use simple_open()
ghes_edac: fix to use list_for_each_entry_safe() when delete list items
ghes_edac: Fix RAS tracing
ghes_edac: Make it compliant with UEFI spec 2.3.1
ghes_edac: Improve driver's printk messages
ghes_edac: Don't credit the same memory dimm twice
ghes_edac: do a better job of filling EDAC DIMM info
ghes_edac: add support for reporting errors via EDAC
ghes_edac: Register at EDAC core the BIOS report
ghes: add the needed hooks for EDAC error report
ghes: move structures/enum to a header file
edac: add support for error type "Info"
edac: add support for raw error reports
edac: reduce stack pressure by using a pre-allocated buffer
edac: lock module owner to avoid error report conflicts
edac: remove proc_name from mci structure
edac: add a new memory layer type
edac: initialize the core earlier
edac: better report error conditions in debug mode
i5100_edac: Remove two checkpatch warnings
...
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 71 |
1 files changed, 21 insertions, 50 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7ae2750bb457..d668a8ae602b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -48,8 +48,8 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> -#include <acpi/apei.h> -#include <acpi/hed.h> + +#include <acpi/ghes.h> #include <asm/mce.h> #include <asm/tlbflush.h> #include <asm/nmi.h> @@ -84,42 +84,6 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) -/* - * One struct ghes is created for each generic hardware error source. - * It provides the context for APEI hardware error timer/IRQ/SCI/NMI - * handler. - * - * estatus: memory buffer for error status block, allocated during - * HEST parsing. - */ -#define GHES_TO_CLEAR 0x0001 -#define GHES_EXITING 0x0002 - -struct ghes { - struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; - unsigned long flags; - union { - struct list_head list; - struct timer_list timer; - unsigned int irq; - }; -}; - -struct ghes_estatus_node { - struct llist_node llnode; - struct acpi_hest_generic *generic; -}; - -struct ghes_estatus_cache { - u32 estatus_len; - atomic_t count; - struct acpi_hest_generic *generic; - unsigned long long time_in; - struct rcu_head rcu; -}; - bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic->error_status_address); } -enum { - GHES_SEV_NO = 0x0, - GHES_SEV_CORRECTED = 0x1, - GHES_SEV_RECOVERABLE = 0x2, - GHES_SEV_PANIC = 0x3, -}; - static inline int ghes_severity(int severity) { switch (severity) { @@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) +static void ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; @@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); + ghes_edac_report_mem_error(ghes, sev, mem_err); + #ifdef CONFIG_X86_MCE apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); @@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes) if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } - ghes_do_proc(ghes->estatus); + ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus); + ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (estatus_node) { + estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); memcpy(estatus, ghes->estatus, len); @@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes = NULL; goto err; } + + rc = ghes_edac_register(ghes, &ghes_dev->dev); + if (rc < 0) + goto err; + switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: ghes->timer.function = ghes_poll_func; @@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev) if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } if (request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes)) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } break; case ACPI_HEST_NOTIFY_SCI: @@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev) platform_set_drvdata(ghes_dev, ghes); return 0; +err_edac_unreg: + ghes_edac_unregister(ghes); err: if (ghes) { ghes_fini(ghes); @@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev) } ghes_fini(ghes); + + ghes_edac_unregister(ghes); + kfree(ghes); platform_set_drvdata(ghes_dev, NULL); |