From 53f2d02898755d1b24bde1975e202815d29fdb81 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 23 Feb 2012 08:10:34 -0300
Subject: RAS: Add a tracepoint for reporting memory controller events

Add a new tracepoint-based hardware events report method for
reporting Memory Controller events.

Part of the description bellow is shamelessly copied from Tony
Luck's notes about the Hardware Error BoF during LPC 2010 [1].
Tony, thanks for your notes and discussions to generate the
h/w error reporting requirements.

[1] http://lwn.net/Articles/416669/

    We have several subsystems & methods for reporting hardware errors:

    1) EDAC ("Error Detection and Correction").  In its original form
    this consisted of a platform specific driver that read topology
    information and error counts from chipset registers and reported
    the results via a sysfs interface.

    2) mcelog - x86 specific decoding of machine check bank registers
    reporting in binary form via /dev/mcelog. Recent additions make use
    of the APEI extensions that were documented in version 4.0a of the
    ACPI specification to acquire more information about errors without
    having to rely reading chipset registers directly. A user level
    programs decodes into somewhat human readable format.

    3) drivers/edac/mce_amd.c - this driver hooks into the mcelog path and
    decodes errors reported via machine check bank registers in AMD
    processors to the console log using printk();

    Each of these mechanisms has a band of followers ... and none
    of them appear to meet all the needs of all users.

As part of a RAS subsystem, let's encapsulate the memory error hardware
events into a trace facility.

The tracepoint printk will be displayed like:

mc_event: [quant] (Corrected|Uncorrected|Fatal) error:[error msg] on [label] ([location] [edac_mc detail] [driver_detail]

Where:
       	[quant] is the quantity of errors
	[error msg] is the driver-specific error message
		    (e. g. "memory read", "bus error", ...);
	[location] is the location in terms of memory controller and
		   branch/channel/slot, channel/slot or csrow/channel;
	[label] is the memory stick label;
	[edac_mc detail] describes the address location of the error
			 and the syndrome;
	[driver detail] is driver-specifig error message details,
			when needed/provided (e. g. "area:DMA", ...)

For example:

mc_event: 1 Corrected error:memory read on memory stick DIMM_1A (mc:0 location:0:0:0 page:0x586b6e offset:0xa66 grain:32 syndrome:0x0 area:DMA)

Of course, any userspace tools meant to handle errors should not parse
the above data. They should, instead, use the binary fields provided by
the tracepoint, mapping them directly into their Management Information
Base.

NOTE: The original patch was providing an additional mechanism for
MCA-based trace events that also contained MCA error register data.
However, as no agreement was reached so far for the MCA-based trace
events, for now, let's add events only for memory errors.
A latter patch is planned to change the tracepoint, for those types
of event.

Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 72 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 16 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 10f375032e96..ce25750a83f9 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -27,12 +27,17 @@
 #include <linux/list.h>
 #include <linux/ctype.h>
 #include <linux/edac.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/page.h>
 #include <asm/edac.h>
 #include "edac_core.h"
 #include "edac_module.h"
 
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
 /* lock to memory controller's control array */
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
@@ -384,6 +389,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	 * which will perform kobj unregistration and the actual free
 	 * will occur during the kobject callback operation
 	 */
+
 	return mci;
 }
 EXPORT_SYMBOL_GPL(edac_mc_alloc);
@@ -902,19 +908,19 @@ static void edac_ce_error(struct mem_ctl_info *mci,
 			  const bool enable_per_layer_report,
 			  const unsigned long page_frame_number,
 			  const unsigned long offset_in_page,
-			  u32 grain)
+			  long grain)
 {
 	unsigned long remapped_page;
 
 	if (edac_mc_get_log_ce()) {
 		if (other_detail && *other_detail)
 			edac_mc_printk(mci, KERN_WARNING,
-				       "CE %s on %s (%s%s - %s)\n",
+				       "CE %s on %s (%s %s - %s)\n",
 				       msg, label, location,
 				       detail, other_detail);
 		else
 			edac_mc_printk(mci, KERN_WARNING,
-				       "CE %s on %s (%s%s)\n",
+				       "CE %s on %s (%s %s)\n",
 				       msg, label, location,
 				       detail);
 	}
@@ -953,12 +959,12 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	if (edac_mc_get_log_ue()) {
 		if (other_detail && *other_detail)
 			edac_mc_printk(mci, KERN_WARNING,
-				       "UE %s on %s (%s%s - %s)\n",
+				       "UE %s on %s (%s %s - %s)\n",
 			               msg, label, location, detail,
 				       other_detail);
 		else
 			edac_mc_printk(mci, KERN_WARNING,
-				       "UE %s on %s (%s%s)\n",
+				       "UE %s on %s (%s %s)\n",
 			               msg, label, location, detail);
 	}
 
@@ -975,27 +981,50 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 }
 
 #define OTHER_LABEL " or "
+
+/**
+ * edac_mc_handle_error - reports a memory event to userspace
+ *
+ * @type:		severity of the error (CE/UE/Fatal)
+ * @mci:		a struct mem_ctl_info pointer
+ * @page_frame_number:	mem page where the error occurred
+ * @offset_in_page:	offset of the error inside the page
+ * @syndrome:		ECC syndrome
+ * @top_layer:		Memory layer[0] position
+ * @mid_layer:		Memory layer[1] position
+ * @low_layer:		Memory layer[2] position
+ * @msg:		Message meaningful to the end users that
+ *			explains the event
+ * @other_detail:	Technical details about the event that
+ *			may help hardware manufacturers and
+ *			EDAC developers to analyse the event
+ * @arch_log:		Architecture-specific struct that can
+ *			be used to add extended information to the
+ *			tracepoint, like dumping MCE registers.
+ */
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
 			  const unsigned long page_frame_number,
 			  const unsigned long offset_in_page,
 			  const unsigned long syndrome,
-			  const int layer0,
-			  const int layer1,
-			  const int layer2,
+			  const int top_layer,
+			  const int mid_layer,
+			  const int low_layer,
 			  const char *msg,
 			  const char *other_detail,
-			  const void *mcelog)
+			  const void *arch_log)
 {
 	/* FIXME: too much for stack: move it to some pre-alocated area */
 	char detail[80], location[80];
 	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 	char *p;
 	int row = -1, chan = -1;
-	int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
+	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
 	int i;
-	u32 grain;
+	long grain;
 	bool enable_per_layer_report = false;
+	u16 error_count;	/* FIXME: make it a parameter */
+	u8 grain_bits;
 
 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 
@@ -1045,11 +1074,11 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	for (i = 0; i < mci->tot_dimms; i++) {
 		struct dimm_info *dimm = &mci->dimms[i];
 
-		if (layer0 >= 0 && layer0 != dimm->location[0])
+		if (top_layer >= 0 && top_layer != dimm->location[0])
 			continue;
-		if (layer1 >= 0 && layer1 != dimm->location[1])
+		if (mid_layer >= 0 && mid_layer != dimm->location[1])
 			continue;
-		if (layer2 >= 0 && layer2 != dimm->location[2])
+		if (low_layer >= 0 && low_layer != dimm->location[2])
 			continue;
 
 		/* get the max grain, over the error match range */
@@ -1120,11 +1149,22 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			     edac_layer_name[mci->layers[i].type],
 			     pos[i]);
 	}
+	if (p > location)
+		*(p - 1) = '\0';
+
+	/* Report the error via the trace interface */
+
+	error_count = 1;	/* FIXME: allow change it */
+	grain_bits = fls_long(grain) + 1;
+	trace_mc_event(type, msg, label, error_count,
+		       mci->mc_idx, top_layer, mid_layer, low_layer,
+		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
+		       grain_bits, syndrome, other_detail);
 
 	/* Memory type dependent details about the error */
 	if (type == HW_EVENT_ERR_CORRECTED) {
 		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
+			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
 			page_frame_number, offset_in_page,
 			grain, syndrome);
 		edac_ce_error(mci, pos, msg, location, label, detail,
@@ -1132,7 +1172,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			      page_frame_number, offset_in_page, grain);
 	} else {
 		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%d",
+			"page:0x%lx offset:0x%lx grain:%ld",
 			page_frame_number, offset_in_page, grain);
 
 		edac_ue_error(mci, pos, msg, location, label, detail,
-- 
cgit v1.2.3


From fd687502dc8037aa5a4b84c570ada971106574ee Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 16 Mar 2012 07:44:18 -0300
Subject: edac: Rename the parent dev to pdev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As EDAC doesn't use struct device itself, it created a parent dev
pointer called as "pdev".  Now that we'll be converting it to use
struct device, instead of struct devsys, this needs to be fixed.

No functional changes.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Mark Gross <mark.gross@intel.com>
Cc: Jason Uhlenkott <juhlenko@akamai.com>
Cc: Tim Small <tim@buttersideup.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Egor Martovetsky <egor@pasemi.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Joe Perches <joe@perches.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Niklas Söderlund" <niklas.soderlund@ericsson.com>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
Cc: Josh Boyer <jwboyer@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index ce25750a83f9..811f09a38f3a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -93,7 +93,7 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 		mci->nr_csrows, mci->csrows);
 	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
 		mci->tot_dimms, mci->dimms);
-	debugf3("\tdev = %p\n", mci->dev);
+	debugf3("\tdev = %p\n", mci->pdev);
 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
 }
@@ -428,7 +428,7 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
-		if (mci->dev == dev)
+		if (mci->pdev == dev)
 			return mci;
 	}
 
@@ -580,7 +580,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
 
 	insert_before = &mc_devices;
 
-	p = find_mci_by_dev(mci->dev);
+	p = find_mci_by_dev(mci->pdev);
 	if (unlikely(p != NULL))
 		goto fail0;
 
@@ -602,7 +602,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
 
 fail0:
 	edac_printk(KERN_WARNING, EDAC_MC,
-		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
+		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 	return 1;
 
-- 
cgit v1.2.3


From 7a623c039075e4ea21648d88133fafa6dcfd113d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 16 Apr 2012 16:41:11 -0300
Subject: edac: rewrite the sysfs code to use struct device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EDAC subsystem uses the old struct sysdev approach,
creating all nodes using the raw sysfs API. This is bad,
as the API is deprecated.

As we'll be changing the EDAC API, let's first port the existing
code to struct device.

There's one drawback on this patch: driver-specific sysfs
nodes, used by mpc85xx_edac, amd64_edac and i7core_edac
 won't be created anymore. While it would be possible to
also port the device-specific code, that would mix kobj with
struct device, with is not recommended. Also, it is easier and nicer
to move the code to the drivers, instead, as the core can get rid
of some complex logic that just emulates what the device_add()
and device_create_file() already does.

The next patches will convert the driver-specific code to use
the device-specific calls. Then, the remaining bits of the old
sysfs API will be removed.

NOTE: a per-MC bus is required, otherwise devices with more than
one memory controller will hit a bug like the one below:

[  819.094946] EDAC DEBUG: find_mci_by_dev: find_mci_by_dev()
[  819.094948] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device() idx=1
[  819.094952] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device(): creating device mc1
[  819.094967] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device creating dimm0, located at channel 0 slot 0
[  819.094984] ------------[ cut here ]------------
[  819.100142] WARNING: at fs/sysfs/dir.c:481 sysfs_add_one+0xc1/0xf0()
[  819.107282] Hardware name: S2600CP
[  819.111078] sysfs: cannot create duplicate filename '/bus/edac/devices/dimm0'
[  819.119062] Modules linked in: sb_edac(+) edac_core ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge stp llc sunrpc binfmt_misc dm_mirror dm_region_hash dm_log vhost_net macvtap macvlan tun kvm microcode pcspkr iTCO_wdt iTCO_vendor_support igb i2c_i801 i2c_core sg ioatdma dca sr_mod cdrom sd_mod crc_t10dif ahci libahci isci libsas libata scsi_transport_sas scsi_mod wmi dm_mod [last unloaded: scsi_wait_scan]
[  819.175748] Pid: 10902, comm: modprobe Not tainted 3.3.0-0.11.el7.v12.2.x86_64 #1
[  819.184113] Call Trace:
[  819.186868]  [<ffffffff8105adaf>] warn_slowpath_common+0x7f/0xc0
[  819.193573]  [<ffffffff8105aea6>] warn_slowpath_fmt+0x46/0x50
[  819.200000]  [<ffffffff811f53d1>] sysfs_add_one+0xc1/0xf0
[  819.206025]  [<ffffffff811f5cf5>] sysfs_do_create_link+0x135/0x220
[  819.212944]  [<ffffffff811f7023>] ? sysfs_create_group+0x13/0x20
[  819.219656]  [<ffffffff811f5df3>] sysfs_create_link+0x13/0x20
[  819.226109]  [<ffffffff813b04f6>] bus_add_device+0xe6/0x1b0
[  819.232350]  [<ffffffff813ae7cb>] device_add+0x2db/0x460
[  819.238300]  [<ffffffffa0325634>] edac_create_dimm_object+0x84/0xf0 [edac_core]
[  819.246460]  [<ffffffffa0325e18>] edac_create_sysfs_mci_device+0xe8/0x290 [edac_core]
[  819.255215]  [<ffffffffa0322e2a>] edac_mc_add_mc+0x5a/0x2c0 [edac_core]
[  819.262611]  [<ffffffffa03412df>] sbridge_register_mci+0x1bc/0x279 [sb_edac]
[  819.270493]  [<ffffffffa03417a3>] sbridge_probe+0xef/0x175 [sb_edac]
[  819.277630]  [<ffffffff813ba4e8>] ? pm_runtime_enable+0x58/0x90
[  819.284268]  [<ffffffff812f430c>] local_pci_probe+0x5c/0xd0
[  819.290508]  [<ffffffff812f5ba1>] __pci_device_probe+0xf1/0x100
[  819.297117]  [<ffffffff812f5bea>] pci_device_probe+0x3a/0x60
[  819.303457]  [<ffffffff813b1003>] really_probe+0x73/0x270
[  819.309496]  [<ffffffff813b138e>] driver_probe_device+0x4e/0xb0
[  819.316104]  [<ffffffff813b149b>] __driver_attach+0xab/0xb0
[  819.322337]  [<ffffffff813b13f0>] ? driver_probe_device+0xb0/0xb0
[  819.329151]  [<ffffffff813af5d6>] bus_for_each_dev+0x56/0x90
[  819.335489]  [<ffffffff813b0d7e>] driver_attach+0x1e/0x20
[  819.341534]  [<ffffffff813b0980>] bus_add_driver+0x1b0/0x2a0
[  819.347884]  [<ffffffffa0347000>] ? 0xffffffffa0346fff
[  819.353641]  [<ffffffff813b19f6>] driver_register+0x76/0x140
[  819.359980]  [<ffffffff8159f18b>] ? printk+0x51/0x53
[  819.365524]  [<ffffffffa0347000>] ? 0xffffffffa0346fff
[  819.371291]  [<ffffffff812f5896>] __pci_register_driver+0x56/0xd0
[  819.378096]  [<ffffffffa0347054>] sbridge_init+0x54/0x1000 [sb_edac]
[  819.385231]  [<ffffffff8100203f>] do_one_initcall+0x3f/0x170
[  819.391577]  [<ffffffff810bcd2e>] sys_init_module+0xbe/0x230
[  819.397926]  [<ffffffff815bb529>] system_call_fastpath+0x16/0x1b
[  819.404633] ---[ end trace 1654fdd39556689f ]---

This happens because the bus is not being properly initialized.
Instead of putting the memory sub-devices inside the memory controller,
it is putting everything under the same directory:

$ tree /sys/bus/edac/
/sys/bus/edac/
├── devices
│   ├── all_channel_counts -> ../../../devices/system/edac/mc/mc0/all_channel_counts
│   ├── csrow0 -> ../../../devices/system/edac/mc/mc0/csrow0
│   ├── csrow1 -> ../../../devices/system/edac/mc/mc0/csrow1
│   ├── csrow2 -> ../../../devices/system/edac/mc/mc0/csrow2
│   ├── dimm0 -> ../../../devices/system/edac/mc/mc0/dimm0
│   ├── dimm1 -> ../../../devices/system/edac/mc/mc0/dimm1
│   ├── dimm3 -> ../../../devices/system/edac/mc/mc0/dimm3
│   ├── dimm6 -> ../../../devices/system/edac/mc/mc0/dimm6
│   ├── inject_addrmatch -> ../../../devices/system/edac/mc/mc0/inject_addrmatch
│   ├── mc -> ../../../devices/system/edac/mc
│   └── mc0 -> ../../../devices/system/edac/mc/mc0
├── drivers
├── drivers_autoprobe
├── drivers_probe
└── uevent

On a multi-memory controller system, the names "csrow%d" and "dimm%d"
should be under "mc%d", and not at the main hierarchy level.

So, we need to create a per-MC bus, in order to have its own namespace.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Greg K H <gregkh@linuxfoundation.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 811f09a38f3a..61ae34643b49 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -218,7 +218,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	unsigned size, tot_dimms = 1, count = 1;
 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 	void *pvt, *p, *ptr = NULL;
-	int i, j, err, row, chn, n, len;
+	int i, j, row, chn, n, len;
 	bool per_rank = false;
 
 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
@@ -374,15 +374,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	mci->op_state = OP_ALLOC;
 	INIT_LIST_HEAD(&mci->grp_kobj_list);
 
-	/*
-	 * Initialize the 'root' kobj for the edac_mc controller
-	 */
-	err = edac_mc_register_sysfs_main_kobj(mci);
-	if (err) {
-		kfree(mci);
-		return NULL;
-	}
-
 	/* at this point, the root kobj is valid, and in order to
 	 * 'free' the object, then the function:
 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
@@ -403,7 +394,7 @@ void edac_mc_free(struct mem_ctl_info *mci)
 {
 	debugf1("%s()\n", __func__);
 
-	edac_mc_unregister_sysfs_main_kobj(mci);
+	edac_unregister_sysfs(mci);
 
 	/* free the mci instance memory here */
 	kfree(mci);
-- 
cgit v1.2.3


From d90c008963ef638cb7ab7d5eb76362b3c2d379bc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 21 Mar 2012 16:55:02 -0300
Subject: edac: Get rid of the old kobj's from the edac mc code

Now that al users for the old kobj raw access are gone,
we can get rid of the legacy kobj-based structures and
data.

Reviewed-by: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Michal Marek <mmarek@suse.cz>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 61ae34643b49..4a6fdc03740e 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -372,7 +372,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	}
 
 	mci->op_state = OP_ALLOC;
-	INIT_LIST_HEAD(&mci->grp_kobj_list);
 
 	/* at this point, the root kobj is valid, and in order to
 	 * 'free' the object, then the function:
-- 
cgit v1.2.3


From de3910eb79ac8c0f29a11224661c0ebaaf813039 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 24 Apr 2012 15:05:43 -0300
Subject: edac: change the mem allocation scheme to make
 Documentation/kobject.txt happy

Kernel kobjects have rigid rules: each container object should be
dynamically allocated, and can't be allocated into a single kmalloc.

EDAC never obeyed this rule: it has a single malloc function that
allocates all needed data into a single kzalloc.

As this is not accepted anymore, change the allocation schema of the
EDAC *_info structs to enforce this Kernel standard.

Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Doug Thompson <norsk5@yahoo.com>
Cc: Greg K H <gregkh@linuxfoundation.org>
Cc: Borislav Petkov <borislav.petkov@amd.com>
Cc: Mark Gross <mark.gross@intel.com>
Cc: Tim Small <tim@buttersideup.com>
Cc: Ranganathan Desikan <ravi@jetztechnologies.com>
Cc: "Arvind R." <arvino55@gmail.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Egor Martovetsky <egor@pasemi.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Hitoshi Mitake <h.mitake@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Shaohui Xie <Shaohui.Xie@freescale.com>
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 107 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 70 insertions(+), 37 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4a6fdc03740e..db2ba31ba2b1 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -210,15 +210,15 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 {
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer *layer;
-	struct csrow_info *csi, *csr;
-	struct rank_info *chi, *chp, *chan;
+	struct csrow_info *csr;
+	struct rank_info *chan;
 	struct dimm_info *dimm;
 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 	unsigned pos[EDAC_MAX_LAYERS];
 	unsigned size, tot_dimms = 1, count = 1;
 	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 	void *pvt, *p, *ptr = NULL;
-	int i, j, row, chn, n, len;
+	int i, j, row, chn, n, len, off;
 	bool per_rank = false;
 
 	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
@@ -244,9 +244,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	 */
 	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
-	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
-	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
-	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
 	for (i = 0; i < n_layers; i++) {
 		count *= layers[i].size;
 		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
@@ -264,6 +261,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 		tot_dimms,
 		per_rank ? "ranks" : "dimms",
 		tot_csrows * tot_channels);
+
 	mci = kzalloc(size, GFP_KERNEL);
 	if (mci == NULL)
 		return NULL;
@@ -272,9 +270,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	 * rather than an imaginary chunk of memory located at address 0.
 	 */
 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
-	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
-	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
-	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
 	for (i = 0; i < n_layers; i++) {
 		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
@@ -283,8 +278,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
 	/* setup index and various internal pointers */
 	mci->mc_idx = mc_num;
-	mci->csrows = csi;
-	mci->dimms  = dimm;
 	mci->tot_dimms = tot_dimms;
 	mci->pvt_info = pvt;
 	mci->n_layers = n_layers;
@@ -295,39 +288,60 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	mci->mem_is_per_rank = per_rank;
 
 	/*
-	 * Fill the csrow struct
+	 * Alocate and fill the csrow/channels structs
 	 */
+	mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
+	if (!mci->csrows)
+		goto error;
 	for (row = 0; row < tot_csrows; row++) {
-		csr = &csi[row];
+		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
+		if (!csr)
+			goto error;
+		mci->csrows[row] = csr;
 		csr->csrow_idx = row;
 		csr->mci = mci;
 		csr->nr_channels = tot_channels;
-		chp = &chi[row * tot_channels];
-		csr->channels = chp;
+		csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
+					GFP_KERNEL);
+		if (!csr->channels)
+			goto error;
 
 		for (chn = 0; chn < tot_channels; chn++) {
-			chan = &chp[chn];
+			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
+			if (!chan)
+				goto error;
+			csr->channels[chn] = chan;
 			chan->chan_idx = chn;
 			chan->csrow = csr;
 		}
 	}
 
 	/*
-	 * Fill the dimm struct
+	 * Allocate and fill the dimm structs
 	 */
+	mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
+	if (!mci->dimms)
+		goto error;
+
 	memset(&pos, 0, sizeof(pos));
 	row = 0;
 	chn = 0;
 	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
 		per_rank ? "ranks" : "dimms");
 	for (i = 0; i < tot_dimms; i++) {
-		chan = &csi[row].channels[chn];
-		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
-			       pos[0], pos[1], pos[2]);
+		chan = mci->csrows[row]->channels[chn];
+		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
+		if (off < 0 || off >= tot_dimms) {
+			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
+			goto error;
+		}
+
+		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
+		mci->dimms[off] = dimm;
 		dimm->mci = mci;
 
-		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
-			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
+		debugf2("%s: %d: %s%i (%d:%d:%d): row %d, chan %d\n", __func__,
+			i, per_rank ? "rank" : "dimm", off,
 			pos[0], pos[1], pos[2], row, chn);
 
 		/*
@@ -381,6 +395,28 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	 */
 
 	return mci;
+
+error:
+	if (mci->dimms) {
+		for (i = 0; i < tot_dimms; i++)
+			kfree(mci->dimms[i]);
+		kfree(mci->dimms);
+	}
+	if (mci->csrows) {
+		for (chn = 0; chn < tot_channels; chn++) {
+			csr = mci->csrows[chn];
+			if (csr) {
+				for (chn = 0; chn < tot_channels; chn++)
+					kfree(csr->channels[chn]);
+				kfree(csr);
+			}
+			kfree(mci->csrows[i]);
+		}
+		kfree(mci->csrows);
+	}
+	kfree(mci);
+
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 
@@ -393,10 +429,8 @@ void edac_mc_free(struct mem_ctl_info *mci)
 {
 	debugf1("%s()\n", __func__);
 
+	/* the mci instance is freed here, when the sysfs object is dropped */
 	edac_unregister_sysfs(mci);
-
-	/* free the mci instance memory here */
-	kfree(mci);
 }
 EXPORT_SYMBOL_GPL(edac_mc_free);
 
@@ -668,13 +702,12 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 		for (i = 0; i < mci->nr_csrows; i++) {
 			int j;
 
-			edac_mc_dump_csrow(&mci->csrows[i]);
-			for (j = 0; j < mci->csrows[i].nr_channels; j++)
-				edac_mc_dump_channel(&mci->csrows[i].
-						channels[j]);
+			edac_mc_dump_csrow(mci->csrows[i]);
+			for (j = 0; j < mci->csrows[i]->nr_channels; j++)
+				edac_mc_dump_channel(mci->csrows[i]->channels[j]);
 		}
 		for (i = 0; i < mci->tot_dimms; i++)
-			edac_mc_dump_dimm(&mci->dimms[i]);
+			edac_mc_dump_dimm(mci->dimms[i]);
 	}
 #endif
 	mutex_lock(&mem_ctls_mutex);
@@ -793,17 +826,17 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 /* FIXME - should return -1 */
 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 {
-	struct csrow_info *csrows = mci->csrows;
+	struct csrow_info **csrows = mci->csrows;
 	int row, i, j, n;
 
 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 	row = -1;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
-		struct csrow_info *csrow = &csrows[i];
+		struct csrow_info *csrow = csrows[i];
 		n = 0;
 		for (j = 0; j < csrow->nr_channels; j++) {
-			struct dimm_info *dimm = csrow->channels[j].dimm;
+			struct dimm_info *dimm = csrow->channels[j]->dimm;
 			n += dimm->nr_pages;
 		}
 		if (n == 0)
@@ -1062,7 +1095,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	p = label;
 	*p = '\0';
 	for (i = 0; i < mci->tot_dimms; i++) {
-		struct dimm_info *dimm = &mci->dimms[i];
+		struct dimm_info *dimm = mci->dimms[i];
 
 		if (top_layer >= 0 && top_layer != dimm->location[0])
 			continue;
@@ -1120,13 +1153,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			strcpy(label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
 			if (row >= 0) {
-				mci->csrows[row].ce_count++;
+				mci->csrows[row]->ce_count++;
 				if (chan >= 0)
-					mci->csrows[row].channels[chan].ce_count++;
+					mci->csrows[row]->channels[chan]->ce_count++;
 			}
 		} else
 			if (row >= 0)
-				mci->csrows[row].ue_count++;
+				mci->csrows[row]->ue_count++;
 	}
 
 	/* Fill the RAM location data */
-- 
cgit v1.2.3


From dd23cd6eb1f59ba722a6e6aa228adff7c01404de Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 29 Apr 2012 11:59:14 -0300
Subject: edac: Don't add __func__ or __FILE__ for debugf[0-9] msgs

The debug macro already adds that. Most of the work here was
made by this small script:

$f .=$_ while (<>);

$f =~ s/(debugf[0-9]\s*\(\s*)__FILE__\s*": /\1"/g;
$f =~ s/(debugf[0-9]\s*\(\s*)__FILE__\s*/\1/g;
$f =~ s/(debugf[0-9]\s*\(\s*)__FILE__\s*"MC: /\1"/g;

$f =~ s/(debugf[0-9]\s*\(\")\%s[\:\,\(\)]*\s*([^\"]*\s*[^\)]+)__func__\s*\,\s*/\1\2/g;
$f =~ s/(debugf[0-9]\s*\(\")\%s[\:\,\(\)]*\s*([^\"]*\s*[^\)]+),\s*__func__\s*\)/\1\2)/g;
$f =~ s/(debugf[0-9]\s*\(\"MC\:\s*)\%s[\:\,\(\)]*\s*([^\"]*\s*[^\)]+)__func__\s*\,\s*/\1\2/g;
$f =~ s/(debugf[0-9]\s*\(\"MC\:\s*)\%s[\:\,\(\)]*\s*([^\"]*\s*[^\)]+),\s*__func__\s*\)/\1\2)/g;

$f =~ s/\"MC\: \\n\"/"MC:\\n"/g;

print $f;

After running the script, manual cleanups were done to fix it the remaining
places.

While here, removed the __LINE__ on most places, as it doesn't actually give
useful info on most places.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index db2ba31ba2b1..4df9c4ac63c3 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -246,18 +246,18 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 	for (i = 0; i < n_layers; i++) {
 		count *= layers[i].size;
-		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
+		debugf4("errcount layer %d size %d\n", i, count);
 		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 		tot_errcount += 2 * count;
 	}
 
-	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
+	debugf4("allocating %d error counters\n", tot_errcount);
 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 	size = ((unsigned long)pvt) + sz_pvt;
 
-	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
-		__func__, size,
+	debugf1("allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+		size,
 		tot_dimms,
 		per_rank ? "ranks" : "dimms",
 		tot_csrows * tot_channels);
@@ -326,7 +326,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	memset(&pos, 0, sizeof(pos));
 	row = 0;
 	chn = 0;
-	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
+	debugf4("initializing %d %s\n", tot_dimms,
 		per_rank ? "ranks" : "dimms");
 	for (i = 0; i < tot_dimms; i++) {
 		chan = mci->csrows[row]->channels[chn];
@@ -340,8 +340,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 		mci->dimms[off] = dimm;
 		dimm->mci = mci;
 
-		debugf2("%s: %d: %s%i (%d:%d:%d): row %d, chan %d\n", __func__,
-			i, per_rank ? "rank" : "dimm", off,
+		debugf2("%d: %s%i (%d:%d:%d): row %d, chan %d\n", i,
+			per_rank ? "rank" : "dimm", off,
 			pos[0], pos[1], pos[2], row, chn);
 
 		/*
@@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc);
  */
 void edac_mc_free(struct mem_ctl_info *mci)
 {
-	debugf1("%s()\n", __func__);
+	debugf1("\n");
 
 	/* the mci instance is freed here, when the sysfs object is dropped */
 	edac_unregister_sysfs(mci);
@@ -447,7 +447,7 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 	struct mem_ctl_info *mci;
 	struct list_head *item;
 
-	debugf3("%s()\n", __func__);
+	debugf3("\n");
 
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
@@ -515,7 +515,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
  */
 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 {
-	debugf0("%s()\n", __func__);
+	debugf0("\n");
 
 	/* if this instance is not in the POLL state, then simply return */
 	if (mci->op_state != OP_RUNNING_POLL)
@@ -542,8 +542,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 
 	status = cancel_delayed_work(&mci->work);
 	if (status == 0) {
-		debugf0("%s() not canceled, flush the queue\n",
-			__func__);
+		debugf0("not canceled, flush the queue\n");
 
 		/* workq instance might be running, wait for it */
 		flush_workqueue(edac_workqueue);
@@ -690,7 +689,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
-	debugf0("%s()\n", __func__);
+	debugf0("\n");
 
 #ifdef CONFIG_EDAC_DEBUG
 	if (edac_debug_level >= 3)
@@ -761,7 +760,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 {
 	struct mem_ctl_info *mci;
 
-	debugf0("%s()\n", __func__);
+	debugf0("\n");
 
 	mutex_lock(&mem_ctls_mutex);
 
@@ -799,7 +798,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 	void *virt_addr;
 	unsigned long flags = 0;
 
-	debugf3("%s()\n", __func__);
+	debugf3("\n");
 
 	/* ECC error page was not in our memory. Ignore it. */
 	if (!pfn_valid(page))
@@ -829,7 +828,7 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 	struct csrow_info **csrows = mci->csrows;
 	int row, i, j, n;
 
-	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
+	debugf1("MC%d: 0x%lx\n", mci->mc_idx, page);
 	row = -1;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
@@ -842,8 +841,8 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 		if (n == 0)
 			continue;
 
-		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
-			"mask(0x%lx)\n", mci->mc_idx, __func__,
+		debugf3("MC%d: first(0x%lx) page(0x%lx) last(0x%lx) "
+			"mask(0x%lx)\n", mci->mc_idx,
 			csrow->first_page, page, csrow->last_page,
 			csrow->page_mask);
 
@@ -1049,7 +1048,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	u16 error_count;	/* FIXME: make it a parameter */
 	u8 grain_bits;
 
-	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
+	debugf3("MC%d\n", mci->mc_idx);
 
 	/*
 	 * Check if the event report is consistent and if the memory
@@ -1127,8 +1126,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			 * get csrow/channel of the DIMM, in order to allow
 			 * incrementing the compat API counters
 			 */
-			debugf4("%s: %s csrows map: (%d,%d)\n",
-				__func__,
+			debugf4("%s csrows map: (%d,%d)\n",
 				mci->mem_is_per_rank ? "rank" : "dimm",
 				dimm->csrow, dimm->cschannel);
 
@@ -1147,8 +1145,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	if (!enable_per_layer_report) {
 		strcpy(label, "any memory");
 	} else {
-		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
-			__func__, row, chan);
+		debugf4("csrow/channel to increment: (%d,%d)\n",
+			row, chan);
 		if (p == label)
 			strcpy(label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
-- 
cgit v1.2.3


From 956b9ba156dbfdb9cede2b2927ddf8be2233b3a7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 29 Apr 2012 17:08:39 -0300
Subject: edac: Convert debugfX to edac_dbg(X,

Use a more common debugging style.

Remove __FILE__ uses, add missing newlines,
coalesce formats and align arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 121 ++++++++++++++++++++++++-------------------------
 1 file changed, 60 insertions(+), 61 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4df9c4ac63c3..a39fe6f966e3 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -46,56 +46,57 @@ static LIST_HEAD(mc_devices);
 
 static void edac_mc_dump_channel(struct rank_info *chan)
 {
-	debugf4("\tchannel = %p\n", chan);
-	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
-	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
-	debugf4("\tchannel->dimm = %p\n", chan->dimm);
+	edac_dbg(4, "\tchannel = %p\n", chan);
+	edac_dbg(4, "\tchannel->chan_idx = %d\n", chan->chan_idx);
+	edac_dbg(4, "\tchannel->csrow = %p\n", chan->csrow);
+	edac_dbg(4, "\tchannel->dimm = %p\n", chan->dimm);
 }
 
 static void edac_mc_dump_dimm(struct dimm_info *dimm)
 {
 	int i;
 
-	debugf4("\tdimm = %p\n", dimm);
-	debugf4("\tdimm->label = '%s'\n", dimm->label);
-	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
-	debugf4("\tdimm location ");
+	edac_dbg(4, "\tdimm = %p\n", dimm);
+	edac_dbg(4, "\tdimm->label = '%s'\n", dimm->label);
+	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
+	edac_dbg(4, "\tdimm location ");
 	for (i = 0; i < dimm->mci->n_layers; i++) {
 		printk(KERN_CONT "%d", dimm->location[i]);
 		if (i < dimm->mci->n_layers - 1)
 			printk(KERN_CONT ".");
 	}
 	printk(KERN_CONT "\n");
-	debugf4("\tdimm->grain = %d\n", dimm->grain);
-	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
+	edac_dbg(4, "\tdimm->grain = %d\n", dimm->grain);
+	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
 }
 
 static void edac_mc_dump_csrow(struct csrow_info *csrow)
 {
-	debugf4("\tcsrow = %p\n", csrow);
-	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
-	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
-	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
-	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
-	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
-	debugf4("\tcsrow->channels = %p\n", csrow->channels);
-	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
+	edac_dbg(4, "\tcsrow = %p\n", csrow);
+	edac_dbg(4, "\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
+	edac_dbg(4, "\tcsrow->first_page = 0x%lx\n", csrow->first_page);
+	edac_dbg(4, "\tcsrow->last_page = 0x%lx\n", csrow->last_page);
+	edac_dbg(4, "\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
+	edac_dbg(4, "\tcsrow->nr_channels = %d\n", csrow->nr_channels);
+	edac_dbg(4, "\tcsrow->channels = %p\n", csrow->channels);
+	edac_dbg(4, "\tcsrow->mci = %p\n", csrow->mci);
 }
 
 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 {
-	debugf3("\tmci = %p\n", mci);
-	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
-	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
-	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
-	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
-	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
-		mci->nr_csrows, mci->csrows);
-	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
-		mci->tot_dimms, mci->dimms);
-	debugf3("\tdev = %p\n", mci->pdev);
-	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
-	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
+	edac_dbg(3, "\tmci = %p\n", mci);
+	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
+	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
+	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
+	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
+	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
+		 mci->nr_csrows, mci->csrows);
+	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
+		 mci->tot_dimms, mci->dimms);
+	edac_dbg(3, "\tdev = %p\n", mci->pdev);
+	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
+		 mci->mod_name, mci->ctl_name);
+	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 }
 
 #endif				/* CONFIG_EDAC_DEBUG */
@@ -246,21 +247,21 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 	for (i = 0; i < n_layers; i++) {
 		count *= layers[i].size;
-		debugf4("errcount layer %d size %d\n", i, count);
+		edac_dbg(4, "errcount layer %d size %d\n", i, count);
 		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 		tot_errcount += 2 * count;
 	}
 
-	debugf4("allocating %d error counters\n", tot_errcount);
+	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 	size = ((unsigned long)pvt) + sz_pvt;
 
-	debugf1("allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
-		size,
-		tot_dimms,
-		per_rank ? "ranks" : "dimms",
-		tot_csrows * tot_channels);
+	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
+		 size,
+		 tot_dimms,
+		 per_rank ? "ranks" : "dimms",
+		 tot_csrows * tot_channels);
 
 	mci = kzalloc(size, GFP_KERNEL);
 	if (mci == NULL)
@@ -326,8 +327,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	memset(&pos, 0, sizeof(pos));
 	row = 0;
 	chn = 0;
-	debugf4("initializing %d %s\n", tot_dimms,
-		per_rank ? "ranks" : "dimms");
+	edac_dbg(4, "initializing %d %s\n",
+		 tot_dimms, per_rank ? "ranks" : "dimms");
 	for (i = 0; i < tot_dimms; i++) {
 		chan = mci->csrows[row]->channels[chn];
 		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
@@ -340,9 +341,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 		mci->dimms[off] = dimm;
 		dimm->mci = mci;
 
-		debugf2("%d: %s%i (%d:%d:%d): row %d, chan %d\n", i,
-			per_rank ? "rank" : "dimm", off,
-			pos[0], pos[1], pos[2], row, chn);
+		edac_dbg(2, "%d: %s%i (%d:%d:%d): row %d, chan %d\n",
+			 i, per_rank ? "rank" : "dimm", off,
+			 pos[0], pos[1], pos[2], row, chn);
 
 		/*
 		 * Copy DIMM location and initialize it.
@@ -427,7 +428,7 @@ EXPORT_SYMBOL_GPL(edac_mc_alloc);
  */
 void edac_mc_free(struct mem_ctl_info *mci)
 {
-	debugf1("\n");
+	edac_dbg(1, "\n");
 
 	/* the mci instance is freed here, when the sysfs object is dropped */
 	edac_unregister_sysfs(mci);
@@ -447,7 +448,7 @@ struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 	struct mem_ctl_info *mci;
 	struct list_head *item;
 
-	debugf3("\n");
+	edac_dbg(3, "\n");
 
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
@@ -515,7 +516,7 @@ static void edac_mc_workq_function(struct work_struct *work_req)
  */
 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 {
-	debugf0("\n");
+	edac_dbg(0, "\n");
 
 	/* if this instance is not in the POLL state, then simply return */
 	if (mci->op_state != OP_RUNNING_POLL)
@@ -542,7 +543,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 
 	status = cancel_delayed_work(&mci->work);
 	if (status == 0) {
-		debugf0("not canceled, flush the queue\n");
+		edac_dbg(0, "not canceled, flush the queue\n");
 
 		/* workq instance might be running, wait for it */
 		flush_workqueue(edac_workqueue);
@@ -689,7 +690,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
-	debugf0("\n");
+	edac_dbg(0, "\n");
 
 #ifdef CONFIG_EDAC_DEBUG
 	if (edac_debug_level >= 3)
@@ -760,7 +761,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 {
 	struct mem_ctl_info *mci;
 
-	debugf0("\n");
+	edac_dbg(0, "\n");
 
 	mutex_lock(&mem_ctls_mutex);
 
@@ -798,7 +799,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 	void *virt_addr;
 	unsigned long flags = 0;
 
-	debugf3("\n");
+	edac_dbg(3, "\n");
 
 	/* ECC error page was not in our memory. Ignore it. */
 	if (!pfn_valid(page))
@@ -828,7 +829,7 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 	struct csrow_info **csrows = mci->csrows;
 	int row, i, j, n;
 
-	debugf1("MC%d: 0x%lx\n", mci->mc_idx, page);
+	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 	row = -1;
 
 	for (i = 0; i < mci->nr_csrows; i++) {
@@ -841,10 +842,10 @@ int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 		if (n == 0)
 			continue;
 
-		debugf3("MC%d: first(0x%lx) page(0x%lx) last(0x%lx) "
-			"mask(0x%lx)\n", mci->mc_idx,
-			csrow->first_page, page, csrow->last_page,
-			csrow->page_mask);
+		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
+			 mci->mc_idx,
+			 csrow->first_page, page, csrow->last_page,
+			 csrow->page_mask);
 
 		if ((page >= csrow->first_page) &&
 		    (page <= csrow->last_page) &&
@@ -1048,7 +1049,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	u16 error_count;	/* FIXME: make it a parameter */
 	u8 grain_bits;
 
-	debugf3("MC%d\n", mci->mc_idx);
+	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
 	/*
 	 * Check if the event report is consistent and if the memory
@@ -1126,10 +1127,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			 * get csrow/channel of the DIMM, in order to allow
 			 * incrementing the compat API counters
 			 */
-			debugf4("%s csrows map: (%d,%d)\n",
-				mci->mem_is_per_rank ? "rank" : "dimm",
-				dimm->csrow, dimm->cschannel);
-
+			edac_dbg(4, "%s csrows map: (%d,%d)\n",
+				 mci->mem_is_per_rank ? "rank" : "dimm",
+				 dimm->csrow, dimm->cschannel);
 			if (row == -1)
 				row = dimm->csrow;
 			else if (row >= 0 && row != dimm->csrow)
@@ -1145,8 +1145,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	if (!enable_per_layer_report) {
 		strcpy(label, "any memory");
 	} else {
-		debugf4("csrow/channel to increment: (%d,%d)\n",
-			row, chan);
+		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
 		if (p == label)
 			strcpy(label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
-- 
cgit v1.2.3


From 6e84d359b2bea5ce659b3c3e5d3003fb11bd91d5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 30 Apr 2012 10:24:43 -0300
Subject: edac_mc: Cleanup per-dimm_info debug messages

The edac_mc_alloc() routine allocates one dimm_info device for all
possible memories, including the non-filled ones. The debug messages
there are somewhat confusing. So, cleans them, by moving the code
that prints the memory location to edac_mc, and using it on both
edac_mc_sysfs and edac_mc.

Also, only dumps information when DIMM/ranks are actually
filled.

After this patch, a dimm-based memory controller will print the debug
info as:

[ 1011.380027] EDAC DEBUG: edac_mc_dump_csrow: csrow->csrow_idx = 0
[ 1011.380029] EDAC DEBUG: edac_mc_dump_csrow:   csrow = ffff8801169be000
[ 1011.380031] EDAC DEBUG: edac_mc_dump_csrow:   csrow->first_page = 0x0
[ 1011.380032] EDAC DEBUG: edac_mc_dump_csrow:   csrow->last_page = 0x0
[ 1011.380034] EDAC DEBUG: edac_mc_dump_csrow:   csrow->page_mask = 0x0
[ 1011.380035] EDAC DEBUG: edac_mc_dump_csrow:   csrow->nr_channels = 3
[ 1011.380037] EDAC DEBUG: edac_mc_dump_csrow:   csrow->channels = ffff8801149c2840
[ 1011.380039] EDAC DEBUG: edac_mc_dump_csrow:   csrow->mci = ffff880117426000
[ 1011.380041] EDAC DEBUG: edac_mc_dump_channel:   channel->chan_idx = 0
[ 1011.380042] EDAC DEBUG: edac_mc_dump_channel:     channel = ffff8801149c2860
[ 1011.380044] EDAC DEBUG: edac_mc_dump_channel:     channel->csrow = ffff8801169be000
[ 1011.380046] EDAC DEBUG: edac_mc_dump_channel:     channel->dimm = ffff88010fe90400
...
[ 1011.380095] EDAC DEBUG: edac_mc_dump_dimm: dimm0: channel 0 slot 0 mapped as virtual row 0, chan 0
[ 1011.380097] EDAC DEBUG: edac_mc_dump_dimm:   dimm = ffff88010fe90400
[ 1011.380099] EDAC DEBUG: edac_mc_dump_dimm:   dimm->label = 'CPU#0Channel#0_DIMM#0'
[ 1011.380101] EDAC DEBUG: edac_mc_dump_dimm:   dimm->nr_pages = 0x40000
[ 1011.380103] EDAC DEBUG: edac_mc_dump_dimm:   dimm->grain = 8
[ 1011.380104] EDAC DEBUG: edac_mc_dump_dimm:   dimm->nr_pages = 0x40000
...

(a rank-based memory controller would print, instead of "dimm?", "rank?"
 on the above debug info)

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 95 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 58 insertions(+), 37 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index a39fe6f966e3..98c759dc0d5d 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -42,44 +42,63 @@
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
 
+unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
+			         unsigned len)
+{
+	struct mem_ctl_info *mci = dimm->mci;
+	int i, n, count = 0;
+	char *p = buf;
+
+	for (i = 0; i < mci->n_layers; i++) {
+		n = snprintf(p, len, "%s %d ",
+			      edac_layer_name[mci->layers[i].type],
+			      dimm->location[i]);
+		p += n;
+		len -= n;
+		count += n;
+		if (!len)
+			break;
+	}
+
+	return count;
+}
+
 #ifdef CONFIG_EDAC_DEBUG
 
 static void edac_mc_dump_channel(struct rank_info *chan)
 {
-	edac_dbg(4, "\tchannel = %p\n", chan);
-	edac_dbg(4, "\tchannel->chan_idx = %d\n", chan->chan_idx);
-	edac_dbg(4, "\tchannel->csrow = %p\n", chan->csrow);
-	edac_dbg(4, "\tchannel->dimm = %p\n", chan->dimm);
+	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
+	edac_dbg(4, "    channel = %p\n", chan);
+	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
+	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
 }
 
-static void edac_mc_dump_dimm(struct dimm_info *dimm)
+static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
 {
-	int i;
-
-	edac_dbg(4, "\tdimm = %p\n", dimm);
-	edac_dbg(4, "\tdimm->label = '%s'\n", dimm->label);
-	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
-	edac_dbg(4, "\tdimm location ");
-	for (i = 0; i < dimm->mci->n_layers; i++) {
-		printk(KERN_CONT "%d", dimm->location[i]);
-		if (i < dimm->mci->n_layers - 1)
-			printk(KERN_CONT ".");
-	}
-	printk(KERN_CONT "\n");
-	edac_dbg(4, "\tdimm->grain = %d\n", dimm->grain);
-	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
+	char location[80];
+
+	edac_dimm_info_location(dimm, location, sizeof(location));
+
+	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
+		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
+		 number, location, dimm->csrow, dimm->cschannel);
+	edac_dbg(4, "  dimm = %p\n", dimm);
+	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
+	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
+	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
+	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
 }
 
 static void edac_mc_dump_csrow(struct csrow_info *csrow)
 {
-	edac_dbg(4, "\tcsrow = %p\n", csrow);
-	edac_dbg(4, "\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
-	edac_dbg(4, "\tcsrow->first_page = 0x%lx\n", csrow->first_page);
-	edac_dbg(4, "\tcsrow->last_page = 0x%lx\n", csrow->last_page);
-	edac_dbg(4, "\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
-	edac_dbg(4, "\tcsrow->nr_channels = %d\n", csrow->nr_channels);
-	edac_dbg(4, "\tcsrow->channels = %p\n", csrow->channels);
-	edac_dbg(4, "\tcsrow->mci = %p\n", csrow->mci);
+	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
+	edac_dbg(4, "  csrow = %p\n", csrow);
+	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
+	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
+	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
+	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
+	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
+	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 }
 
 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
@@ -327,8 +346,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	memset(&pos, 0, sizeof(pos));
 	row = 0;
 	chn = 0;
-	edac_dbg(4, "initializing %d %s\n",
-		 tot_dimms, per_rank ? "ranks" : "dimms");
 	for (i = 0; i < tot_dimms; i++) {
 		chan = mci->csrows[row]->channels[chn];
 		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
@@ -341,10 +358,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 		mci->dimms[off] = dimm;
 		dimm->mci = mci;
 
-		edac_dbg(2, "%d: %s%i (%d:%d:%d): row %d, chan %d\n",
-			 i, per_rank ? "rank" : "dimm", off,
-			 pos[0], pos[1], pos[2], row, chn);
-
 		/*
 		 * Copy DIMM location and initialize it.
 		 */
@@ -700,14 +713,22 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 		int i;
 
 		for (i = 0; i < mci->nr_csrows; i++) {
+			struct csrow_info *csrow = mci->csrows[i];
+			u32 nr_pages = 0;
 			int j;
 
-			edac_mc_dump_csrow(mci->csrows[i]);
-			for (j = 0; j < mci->csrows[i]->nr_channels; j++)
-				edac_mc_dump_channel(mci->csrows[i]->channels[j]);
+			for (j = 0; j < csrow->nr_channels; j++)
+				nr_pages += csrow->channels[j]->dimm->nr_pages;
+			if (!nr_pages)
+				continue;
+			edac_mc_dump_csrow(csrow);
+			for (j = 0; j < csrow->nr_channels; j++)
+				if (csrow->channels[j]->dimm->nr_pages)
+					edac_mc_dump_channel(csrow->channels[j]);
 		}
 		for (i = 0; i < mci->tot_dimms; i++)
-			edac_mc_dump_dimm(mci->dimms[i]);
+			if (mci->dimms[i]->nr_pages)
+				edac_mc_dump_dimm(mci->dimms[i], i);
 	}
 #endif
 	mutex_lock(&mem_ctls_mutex);
-- 
cgit v1.2.3


From 08a4a136909602eae0e71e147153461df077a46f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 18 May 2012 15:51:02 +0300
Subject: edac_mc: check for allocation failure in edac_mc_alloc()

Add a check here for if kzalloc() failed.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 98c759dc0d5d..91ca4101c789 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -355,6 +355,8 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 		}
 
 		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
+		if (!dimm)
+			goto error;
 		mci->dimms[off] = dimm;
 		dimm->mci = mci;
 
-- 
cgit v1.2.3


From 03f7eae80f4b913929be84e0c883ee98196fd6ff Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 4 Jun 2012 11:29:25 -0300
Subject: edac: remove arch-specific parameter for the error handler

Remove the arch-dependent parameter, as it were not used,
as the MCE tracepoint weren't implemented. It probably doesn't
make sense to have an MCE-specific tracepoint, as this will
cost more bytes at the tracepoint, and tracepoint is not free.

The changes at the EDAC drivers were done by this small perl script:

	$file .=$_ while (<>);
	$file =~ s/(edac_mc_handle_error)\s*\(([^\;]+)\,([^\,\)]+)\s*\)/$1($2)/g;
	print $file;

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 91ca4101c789..fb41e38c6891 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1044,9 +1044,6 @@ static void edac_ue_error(struct mem_ctl_info *mci,
  * @other_detail:	Technical details about the event that
  *			may help hardware manufacturers and
  *			EDAC developers to analyse the event
- * @arch_log:		Architecture-specific struct that can
- *			be used to add extended information to the
- *			tracepoint, like dumping MCE registers.
  */
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
@@ -1057,8 +1054,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const int mid_layer,
 			  const int low_layer,
 			  const char *msg,
-			  const char *other_detail,
-			  const void *arch_log)
+			  const char *other_detail)
 {
 	/* FIXME: too much for stack: move it to some pre-alocated area */
 	char detail[80], location[80];
-- 
cgit v1.2.3


From 9eb07a7fb8a90ee39fa9d5489afc0330cfcfbea7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 4 Jun 2012 13:27:43 -0300
Subject: edac: edac_mc_handle_error(): add an error_count parameter

In order to avoid loosing error events, it is desirable to group
error events together and generate a single trace for several identical
errors.

The trace API already allows reporting multiple errors. Change the
handle_error function to also allow that.

The changes at the drivers were made by this small script:

	$file .=$_ while (<>);
	$file =~ s/(edac_mc_handle_error)\s*\(([^\,]+)\,([^\,]+)\,/$1($2,$3, 1,/g;
	print $file;

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 56 ++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 24 deletions(-)

(limited to 'drivers/edac/edac_mc.c')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index fb41e38c6891..8d59378f839c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -897,15 +897,16 @@ const char *edac_layer_name[] = {
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
 static void edac_inc_ce_error(struct mem_ctl_info *mci,
-				    bool enable_per_layer_report,
-				    const int pos[EDAC_MAX_LAYERS])
+			      bool enable_per_layer_report,
+			      const int pos[EDAC_MAX_LAYERS],
+			      const u16 count)
 {
 	int i, index = 0;
 
-	mci->ce_mc++;
+	mci->ce_mc += count;
 
 	if (!enable_per_layer_report) {
-		mci->ce_noinfo_count++;
+		mci->ce_noinfo_count += count;
 		return;
 	}
 
@@ -913,7 +914,7 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
 		if (pos[i] < 0)
 			break;
 		index += pos[i];
-		mci->ce_per_layer[i][index]++;
+		mci->ce_per_layer[i][index] += count;
 
 		if (i < mci->n_layers - 1)
 			index *= mci->layers[i + 1].size;
@@ -922,14 +923,15 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
 
 static void edac_inc_ue_error(struct mem_ctl_info *mci,
 				    bool enable_per_layer_report,
-				    const int pos[EDAC_MAX_LAYERS])
+				    const int pos[EDAC_MAX_LAYERS],
+				    const u16 count)
 {
 	int i, index = 0;
 
-	mci->ue_mc++;
+	mci->ue_mc += count;
 
 	if (!enable_per_layer_report) {
-		mci->ce_noinfo_count++;
+		mci->ce_noinfo_count += count;
 		return;
 	}
 
@@ -937,7 +939,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
 		if (pos[i] < 0)
 			break;
 		index += pos[i];
-		mci->ue_per_layer[i][index]++;
+		mci->ue_per_layer[i][index] += count;
 
 		if (i < mci->n_layers - 1)
 			index *= mci->layers[i + 1].size;
@@ -945,6 +947,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
 }
 
 static void edac_ce_error(struct mem_ctl_info *mci,
+			  const u16 error_count,
 			  const int pos[EDAC_MAX_LAYERS],
 			  const char *msg,
 			  const char *location,
@@ -961,16 +964,18 @@ static void edac_ce_error(struct mem_ctl_info *mci,
 	if (edac_mc_get_log_ce()) {
 		if (other_detail && *other_detail)
 			edac_mc_printk(mci, KERN_WARNING,
-				       "CE %s on %s (%s %s - %s)\n",
+				       "%d CE %s on %s (%s %s - %s)\n",
+				       error_count,
 				       msg, label, location,
 				       detail, other_detail);
 		else
 			edac_mc_printk(mci, KERN_WARNING,
-				       "CE %s on %s (%s %s)\n",
+				       "%d CE %s on %s (%s %s)\n",
+				       error_count,
 				       msg, label, location,
 				       detail);
 	}
-	edac_inc_ce_error(mci, enable_per_layer_report, pos);
+	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
 
 	if (mci->scrub_mode & SCRUB_SW_SRC) {
 		/*
@@ -994,6 +999,7 @@ static void edac_ce_error(struct mem_ctl_info *mci,
 }
 
 static void edac_ue_error(struct mem_ctl_info *mci,
+			  const u16 error_count,
 			  const int pos[EDAC_MAX_LAYERS],
 			  const char *msg,
 			  const char *location,
@@ -1005,12 +1011,14 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	if (edac_mc_get_log_ue()) {
 		if (other_detail && *other_detail)
 			edac_mc_printk(mci, KERN_WARNING,
-				       "UE %s on %s (%s %s - %s)\n",
+				       "%d UE %s on %s (%s %s - %s)\n",
+				       error_count,
 			               msg, label, location, detail,
 				       other_detail);
 		else
 			edac_mc_printk(mci, KERN_WARNING,
-				       "UE %s on %s (%s %s)\n",
+				       "%d UE %s on %s (%s %s)\n",
+				       error_count,
 			               msg, label, location, detail);
 	}
 
@@ -1023,7 +1031,7 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 			      msg, label, location, detail);
 	}
 
-	edac_inc_ue_error(mci, enable_per_layer_report, pos);
+	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
 #define OTHER_LABEL " or "
@@ -1033,6 +1041,7 @@ static void edac_ue_error(struct mem_ctl_info *mci,
  *
  * @type:		severity of the error (CE/UE/Fatal)
  * @mci:		a struct mem_ctl_info pointer
+ * @error_count:	Number of errors of the same type
  * @page_frame_number:	mem page where the error occurred
  * @offset_in_page:	offset of the error inside the page
  * @syndrome:		ECC syndrome
@@ -1047,6 +1056,7 @@ static void edac_ue_error(struct mem_ctl_info *mci,
  */
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
+			  const u16 error_count,
 			  const unsigned long page_frame_number,
 			  const unsigned long offset_in_page,
 			  const unsigned long syndrome,
@@ -1065,7 +1075,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	int i;
 	long grain;
 	bool enable_per_layer_report = false;
-	u16 error_count;	/* FIXME: make it a parameter */
 	u8 grain_bits;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
@@ -1169,13 +1178,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			strcpy(label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
 			if (row >= 0) {
-				mci->csrows[row]->ce_count++;
+				mci->csrows[row]->ce_count += error_count;
 				if (chan >= 0)
-					mci->csrows[row]->channels[chan]->ce_count++;
+					mci->csrows[row]->channels[chan]->ce_count += error_count;
 			}
 		} else
 			if (row >= 0)
-				mci->csrows[row]->ue_count++;
+				mci->csrows[row]->ue_count += error_count;
 	}
 
 	/* Fill the RAM location data */
@@ -1193,7 +1202,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 
 	/* Report the error via the trace interface */
 
-	error_count = 1;	/* FIXME: allow change it */
 	grain_bits = fls_long(grain) + 1;
 	trace_mc_event(type, msg, label, error_count,
 		       mci->mc_idx, top_layer, mid_layer, low_layer,
@@ -1206,16 +1214,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
 			page_frame_number, offset_in_page,
 			grain, syndrome);
-		edac_ce_error(mci, pos, msg, location, label, detail,
-			      other_detail, enable_per_layer_report,
+		edac_ce_error(mci, error_count, pos, msg, location, label,
+			      detail, other_detail, enable_per_layer_report,
 			      page_frame_number, offset_in_page, grain);
 	} else {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld",
 			page_frame_number, offset_in_page, grain);
 
-		edac_ue_error(mci, pos, msg, location, label, detail,
-			      other_detail, enable_per_layer_report);
+		edac_ue_error(mci, error_count, pos, msg, location, label,
+			      detail, other_detail, enable_per_layer_report);
 	}
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
-- 
cgit v1.2.3