summaryrefslogtreecommitdiffstats
path: root/include/linux/edac.h
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-04-16 16:41:11 -0300
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-06-11 13:23:30 -0300
commit7a623c039075e4ea21648d88133fafa6dcfd113d (patch)
tree125ecf3c32161ca5242237ce177c79109b0a8caa /include/linux/edac.h
parentb0610bb82abd1c4ac97c33f0312cd7fd72eaa325 (diff)
downloadlinux-stable-7a623c039075e4ea21648d88133fafa6dcfd113d.tar.gz
linux-stable-7a623c039075e4ea21648d88133fafa6dcfd113d.tar.bz2
linux-stable-7a623c039075e4ea21648d88133fafa6dcfd113d.zip
edac: rewrite the sysfs code to use struct device
The EDAC subsystem uses the old struct sysdev approach, creating all nodes using the raw sysfs API. This is bad, as the API is deprecated. As we'll be changing the EDAC API, let's first port the existing code to struct device. There's one drawback on this patch: driver-specific sysfs nodes, used by mpc85xx_edac, amd64_edac and i7core_edac won't be created anymore. While it would be possible to also port the device-specific code, that would mix kobj with struct device, with is not recommended. Also, it is easier and nicer to move the code to the drivers, instead, as the core can get rid of some complex logic that just emulates what the device_add() and device_create_file() already does. The next patches will convert the driver-specific code to use the device-specific calls. Then, the remaining bits of the old sysfs API will be removed. NOTE: a per-MC bus is required, otherwise devices with more than one memory controller will hit a bug like the one below: [ 819.094946] EDAC DEBUG: find_mci_by_dev: find_mci_by_dev() [ 819.094948] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device() idx=1 [ 819.094952] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device(): creating device mc1 [ 819.094967] EDAC DEBUG: edac_create_sysfs_mci_device: edac_create_sysfs_mci_device creating dimm0, located at channel 0 slot 0 [ 819.094984] ------------[ cut here ]------------ [ 819.100142] WARNING: at fs/sysfs/dir.c:481 sysfs_add_one+0xc1/0xf0() [ 819.107282] Hardware name: S2600CP [ 819.111078] sysfs: cannot create duplicate filename '/bus/edac/devices/dimm0' [ 819.119062] Modules linked in: sb_edac(+) edac_core ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge stp llc sunrpc binfmt_misc dm_mirror dm_region_hash dm_log vhost_net macvtap macvlan tun kvm microcode pcspkr iTCO_wdt iTCO_vendor_support igb i2c_i801 i2c_core sg ioatdma dca sr_mod cdrom sd_mod crc_t10dif ahci libahci isci libsas libata scsi_transport_sas scsi_mod wmi dm_mod [last unloaded: scsi_wait_scan] [ 819.175748] Pid: 10902, comm: modprobe Not tainted 3.3.0-0.11.el7.v12.2.x86_64 #1 [ 819.184113] Call Trace: [ 819.186868] [<ffffffff8105adaf>] warn_slowpath_common+0x7f/0xc0 [ 819.193573] [<ffffffff8105aea6>] warn_slowpath_fmt+0x46/0x50 [ 819.200000] [<ffffffff811f53d1>] sysfs_add_one+0xc1/0xf0 [ 819.206025] [<ffffffff811f5cf5>] sysfs_do_create_link+0x135/0x220 [ 819.212944] [<ffffffff811f7023>] ? sysfs_create_group+0x13/0x20 [ 819.219656] [<ffffffff811f5df3>] sysfs_create_link+0x13/0x20 [ 819.226109] [<ffffffff813b04f6>] bus_add_device+0xe6/0x1b0 [ 819.232350] [<ffffffff813ae7cb>] device_add+0x2db/0x460 [ 819.238300] [<ffffffffa0325634>] edac_create_dimm_object+0x84/0xf0 [edac_core] [ 819.246460] [<ffffffffa0325e18>] edac_create_sysfs_mci_device+0xe8/0x290 [edac_core] [ 819.255215] [<ffffffffa0322e2a>] edac_mc_add_mc+0x5a/0x2c0 [edac_core] [ 819.262611] [<ffffffffa03412df>] sbridge_register_mci+0x1bc/0x279 [sb_edac] [ 819.270493] [<ffffffffa03417a3>] sbridge_probe+0xef/0x175 [sb_edac] [ 819.277630] [<ffffffff813ba4e8>] ? pm_runtime_enable+0x58/0x90 [ 819.284268] [<ffffffff812f430c>] local_pci_probe+0x5c/0xd0 [ 819.290508] [<ffffffff812f5ba1>] __pci_device_probe+0xf1/0x100 [ 819.297117] [<ffffffff812f5bea>] pci_device_probe+0x3a/0x60 [ 819.303457] [<ffffffff813b1003>] really_probe+0x73/0x270 [ 819.309496] [<ffffffff813b138e>] driver_probe_device+0x4e/0xb0 [ 819.316104] [<ffffffff813b149b>] __driver_attach+0xab/0xb0 [ 819.322337] [<ffffffff813b13f0>] ? driver_probe_device+0xb0/0xb0 [ 819.329151] [<ffffffff813af5d6>] bus_for_each_dev+0x56/0x90 [ 819.335489] [<ffffffff813b0d7e>] driver_attach+0x1e/0x20 [ 819.341534] [<ffffffff813b0980>] bus_add_driver+0x1b0/0x2a0 [ 819.347884] [<ffffffffa0347000>] ? 0xffffffffa0346fff [ 819.353641] [<ffffffff813b19f6>] driver_register+0x76/0x140 [ 819.359980] [<ffffffff8159f18b>] ? printk+0x51/0x53 [ 819.365524] [<ffffffffa0347000>] ? 0xffffffffa0346fff [ 819.371291] [<ffffffff812f5896>] __pci_register_driver+0x56/0xd0 [ 819.378096] [<ffffffffa0347054>] sbridge_init+0x54/0x1000 [sb_edac] [ 819.385231] [<ffffffff8100203f>] do_one_initcall+0x3f/0x170 [ 819.391577] [<ffffffff810bcd2e>] sys_init_module+0xbe/0x230 [ 819.397926] [<ffffffff815bb529>] system_call_fastpath+0x16/0x1b [ 819.404633] ---[ end trace 1654fdd39556689f ]--- This happens because the bus is not being properly initialized. Instead of putting the memory sub-devices inside the memory controller, it is putting everything under the same directory: $ tree /sys/bus/edac/ /sys/bus/edac/ ├── devices │ ├── all_channel_counts -> ../../../devices/system/edac/mc/mc0/all_channel_counts │ ├── csrow0 -> ../../../devices/system/edac/mc/mc0/csrow0 │ ├── csrow1 -> ../../../devices/system/edac/mc/mc0/csrow1 │ ├── csrow2 -> ../../../devices/system/edac/mc/mc0/csrow2 │ ├── dimm0 -> ../../../devices/system/edac/mc/mc0/dimm0 │ ├── dimm1 -> ../../../devices/system/edac/mc/mc0/dimm1 │ ├── dimm3 -> ../../../devices/system/edac/mc/mc0/dimm3 │ ├── dimm6 -> ../../../devices/system/edac/mc/mc0/dimm6 │ ├── inject_addrmatch -> ../../../devices/system/edac/mc/mc0/inject_addrmatch │ ├── mc -> ../../../devices/system/edac/mc │ └── mc0 -> ../../../devices/system/edac/mc/mc0 ├── drivers ├── drivers_autoprobe ├── drivers_probe └── uevent On a multi-memory controller system, the names "csrow%d" and "dimm%d" should be under "mc%d", and not at the main hierarchy level. So, we need to create a per-MC bus, in order to have its own namespace. Reviewed-by: Aristeu Rozanski <arozansk@redhat.com> Cc: Doug Thompson <norsk5@yahoo.com> Cc: Greg K H <gregkh@linuxfoundation.org> Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'include/linux/edac.h')
-rw-r--r--include/linux/edac.h47
1 files changed, 33 insertions, 14 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4e32e8d31e0a..a2b0b6fc002c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -13,6 +13,7 @@
#define _LINUX_EDAC_H_
#include <linux/atomic.h>
+#include <linux/device.h>
#include <linux/kobject.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
@@ -448,14 +449,15 @@ struct edac_mc_layer {
__p; \
})
-
-/* FIXME: add the proper per-location error counts */
struct dimm_info {
+ struct device dev;
+
char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
/* Memory location data */
unsigned location[EDAC_MAX_LAYERS];
+ struct kobject kobj; /* sysfs kobject for this csrow */
struct mem_ctl_info *mci; /* the parent */
u32 grain; /* granularity of reported error in bytes */
@@ -484,6 +486,8 @@ struct dimm_info {
* patches in this series will fix this issue.
*/
struct rank_info {
+ struct device dev;
+
int chan_idx;
struct csrow_info *csrow;
struct dimm_info *dimm;
@@ -492,6 +496,8 @@ struct rank_info {
};
struct csrow_info {
+ struct device dev;
+
/* Used only by edac_mc_find_csrow_by_page() */
unsigned long first_page; /* first page number in csrow */
unsigned long last_page; /* last page number in csrow */
@@ -517,15 +523,6 @@ struct mcidev_sysfs_group {
const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
};
-struct mcidev_sysfs_group_kobj {
- struct list_head list; /* list for all instances within a mc */
-
- struct kobject kobj; /* kobj for the group */
-
- const struct mcidev_sysfs_group *grp; /* group description table */
- struct mem_ctl_info *mci; /* the parent */
-};
-
/* mcidev_sysfs_attribute structure
* used for driver sysfs attributes and in mem_ctl_info
* sysfs top level entries
@@ -536,13 +533,27 @@ struct mcidev_sysfs_attribute {
const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
/* Ops for show/store values at the attribute - not used on group */
- ssize_t (*show)(struct mem_ctl_info *,char *);
- ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
+ ssize_t (*show)(struct mem_ctl_info *, char *);
+ ssize_t (*store)(struct mem_ctl_info *, const char *, size_t);
+
+ void *priv;
+};
+
+/*
+ * struct errcount_attribute - used to store the several error counts
+ */
+struct errcount_attribute_data {
+ int n_layers;
+ int pos[EDAC_MAX_LAYERS];
+ int layer0, layer1, layer2;
};
/* MEMORY controller information structure
*/
struct mem_ctl_info {
+ struct device dev;
+ struct bus_type bus;
+
struct list_head link; /* for global list of mem_ctl_info structs */
struct module *owner; /* Module owner of this control struct */
@@ -587,7 +598,15 @@ struct mem_ctl_info {
struct csrow_info *csrows;
unsigned nr_csrows, num_cschannel;
- /* Memory Controller hierarchy */
+ /*
+ * Memory Controller hierarchy
+ *
+ * There are basically two types of memory controller: the ones that
+ * sees memory sticks ("dimms"), and the ones that sees memory ranks.
+ * All old memory controllers enumerate memories per rank, but most
+ * of the recent drivers enumerate memories per DIMM, instead.
+ * When the memory controller is per rank, mem_is_per_rank is true.
+ */
unsigned n_layers;
struct edac_mc_layer *layers;
bool mem_is_per_rank;