summaryrefslogtreecommitdiffstats
path: root/drivers/cxl
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2023-12-06 19:11:14 -0800
committerDan Williams <dan.j.williams@intel.com>2023-12-07 19:14:04 -0800
commit6f5c4eca48ffe18307b4e1d375817691c9005c87 (patch)
tree7752bb3d6e50aa9295146effbbb4f8adf3a04e3c /drivers/cxl
parent659aa050a53817157b7459529538598a6449c1d3 (diff)
downloadlinux-6f5c4eca48ffe18307b4e1d375817691c9005c87.tar.gz
linux-6f5c4eca48ffe18307b4e1d375817691c9005c87.tar.bz2
linux-6f5c4eca48ffe18307b4e1d375817691c9005c87.zip
cxl/hdm: Fix dpa translation locking
The helper, cxl_dpa_resource_start(), snapshots the dpa-address of an endpoint-decoder after acquiring the cxl_dpa_rwsem. However, it is sufficient to assert that cxl_dpa_rwsem is held rather than acquire it in the helper. Otherwise, it triggers multiple lockdep reports: 1/ Tracing callbacks are in an atomic context that can not acquire sleeping locks: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1525 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1288, name: bash preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 [..] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS edk2-20230524-3.fc38 05/24/2023 Call Trace: <TASK> dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] 2/ The rwsem is already held in the inject poison path: WARNING: possible recursive locking detected 6.7.0-rc2+ #12 Tainted: G W OE N -------------------------------------------- bash/1288 is trying to acquire lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_dpa_resource_start+0x15/0x50 [cxl_core] but task is already holding lock: ffffffffc05f73d0 (cxl_dpa_rwsem){++++}-{3:3}, at: cxl_inject_poison+0x7d/0x1e0 [cxl_core] [..] Call Trace: <TASK> dump_stack_lvl+0x71/0x90 __might_resched+0x1b2/0x2c0 down_read+0x1a/0x190 cxl_dpa_resource_start+0x15/0x50 [cxl_core] cxl_trace_hpa+0x122/0x300 [cxl_core] trace_event_raw_event_cxl_poison+0x1c9/0x2d0 [cxl_core] __traceiter_cxl_poison+0x5c/0x80 [cxl_core] cxl_inject_poison+0x1bc/0x1e0 [cxl_core] This appears to have been an issue since the initial implementation and uncovered by the new cxl-poison.sh test [1]. That test is now passing with these changes. Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events") Link: http://lore.kernel.org/r/e4f2716646918135ddbadf4146e92abb659de734.1700615159.git.alison.schofield@intel.com [1] Cc: <stable@vger.kernel.org> Cc: Alison Schofield <alison.schofield@intel.com> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/core/hdm.c3
-rw-r--r--drivers/cxl/core/port.c4
2 files changed, 3 insertions, 4 deletions
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 529baa8a1759..7d97790b893d 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -363,10 +363,9 @@ resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
{
resource_size_t base = -1;
- down_read(&cxl_dpa_rwsem);
+ lockdep_assert_held(&cxl_dpa_rwsem);
if (cxled->dpa_res)
base = cxled->dpa_res->start;
- up_read(&cxl_dpa_rwsem);
return base;
}
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 38441634e4c6..b7c93bb18f6e 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -226,9 +226,9 @@ static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *at
char *buf)
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- u64 base = cxl_dpa_resource_start(cxled);
- return sysfs_emit(buf, "%#llx\n", base);
+ guard(rwsem_read)(&cxl_dpa_rwsem);
+ return sysfs_emit(buf, "%#llx\n", (u64)cxl_dpa_resource_start(cxled));
}
static DEVICE_ATTR_RO(dpa_resource);