summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael J. Ruhl <michael.j.ruhl@intel.com>2018-08-15 22:54:49 -0700
committerJason Gunthorpe <jgg@mellanox.com>2018-08-20 10:45:48 -0600
commitc513de490f808d8480346f9a58e6a4a5f3de12e7 (patch)
tree13d6a4f14b02dbc078b1a4f515cac232267083fe
parentb4c296f9c96420b8e7e92466ea5960f10ee20aae (diff)
downloadlinux-stable-c513de490f808d8480346f9a58e6a4a5f3de12e7.tar.gz
linux-stable-c513de490f808d8480346f9a58e6a4a5f3de12e7.tar.bz2
linux-stable-c513de490f808d8480346f9a58e6a4a5f3de12e7.zip
IB/hfi1: Invalid NUMA node information can cause a divide by zero
If the system BIOS does not supply NUMA node information to the PCI devices, the NUMA node is selected by choosing the current node. This can lead to the following crash: divide error: 0000 SMP CPU: 0 PID: 4 Comm: kworker/0:0 Tainted: G IOE ------------ 3.10.0-693.21.1.el7.x86_64 #1 Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.01.01.0005.101720141054 10/17/2014 Workqueue: events work_for_cpu_fn task: ffff880174480fd0 ti: ffff880174488000 task.ti: ffff880174488000 RIP: 0010: [<ffffffffc020ac69>] hfi1_dev_affinity_init+0x129/0x6a0 [hfi1] RSP: 0018:ffff88017448bbf8 EFLAGS: 00010246 RAX: 0000000000000011 RBX: ffff88107ffba6c0 RCX: ffff88085c22e130 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880824ad0000 RBP: ffff88017448bc48 R08: 0000000000000011 R09: 0000000000000002 R10: ffff8808582b6ca0 R11: 0000000000003151 R12: ffff8808582b6ca0 R13: ffff8808582b6518 R14: ffff8808582b6010 R15: 0000000000000012 FS: 0000000000000000(0000) GS:ffff88085ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007efc707404f0 CR3: 0000000001a02000 CR4: 00000000001607f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: hfi1_init_dd+0x14b3/0x27a0 [hfi1] ? pcie_capability_write_word+0x46/0x70 ? hfi1_pcie_init+0xc0/0x200 [hfi1] do_init_one+0x153/0x4c0 [hfi1] ? sched_clock_cpu+0x85/0xc0 init_one+0x1b5/0x260 [hfi1] local_pci_probe+0x4a/0xb0 work_for_cpu_fn+0x1a/0x30 process_one_work+0x17f/0x440 worker_thread+0x278/0x3c0 ? manage_workers.isra.24+0x2a0/0x2a0 kthread+0xd1/0xe0 ? insert_kthread_work+0x40/0x40 ret_from_fork+0x77/0xb0 ? insert_kthread_work+0x40/0x40 If the BIOS is not supplying NUMA information: - set the default table count to 1 for all possible nodes - select node 0 (instead of current NUMA) node to get consistent performance - generate an error indicating that the BIOS should be upgraded Reviewed-by: Gary Leshner <gary.s.leshner@intel.com> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index fbe7198a715a..bedd5fba33b0 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -198,7 +198,7 @@ int node_affinity_init(void)
while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
node = pcibus_to_node(dev->bus);
if (node < 0)
- node = numa_node_id();
+ goto out;
hfi1_per_node_cntr[node]++;
}
@@ -206,6 +206,18 @@ int node_affinity_init(void)
}
return 0;
+
+out:
+ /*
+ * Invalid PCI NUMA node information found, note it, and populate
+ * our database 1:1.
+ */
+ pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
+ pr_err("HFI: System BIOS may need to be upgraded\n");
+ for (node = 0; node < node_affinity.num_possible_nodes; node++)
+ hfi1_per_node_cntr[node] = 1;
+
+ return 0;
}
static void node_affinity_destroy(struct hfi1_affinity_node *entry)
@@ -622,8 +634,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
int curr_cpu, possible, i, ret;
bool new_entry = false;
- if (node < 0)
- node = numa_node_id();
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ if (node < 0) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ node = 0;
+ }
dd->node = node;
local_mask = cpumask_of_node(dd->node);