From b6cec8aa4a799d1e146095f0ba52454710f5ede4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 25 Apr 2012 17:42:35 +0000 Subject: RDMA/cma: Fix lockdep false positive recursive locking The following lockdep problem was reported by Or Gerlitz : [ INFO: possible recursive locking detected ] 3.3.0-32035-g1b2649e-dirty #4 Not tainted --------------------------------------------- kworker/5:1/418 is trying to acquire lock: (&id_priv->handler_mutex){+.+.+.}, at: [] rdma_destroy_i d+0x33/0x1f0 [rdma_cm] but task is already holding lock: (&id_priv->handler_mutex){+.+.+.}, at: [] cma_disable_ca llback+0x24/0x45 [rdma_cm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&id_priv->handler_mutex); lock(&id_priv->handler_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/5:1/418: #0: (ib_cm){.+.+.+}, at: [] process_one_work+0x210/0x4a 6 #1: ((&(&work->work)->work)){+.+.+.}, at: [] process_on e_work+0x210/0x4a6 #2: (&id_priv->handler_mutex){+.+.+.}, at: [] cma_disab le_callback+0x24/0x45 [rdma_cm] stack backtrace: Pid: 418, comm: kworker/5:1 Not tainted 3.3.0-32035-g1b2649e-dirty #4 Call Trace: [] ? console_unlock+0x1f4/0x204 [] __lock_acquire+0x16b5/0x174e [] ? save_trace+0x3f/0xb3 [] lock_acquire+0xf0/0x116 [] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] mutex_lock_nested+0x64/0x2ce [] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] ? trace_hardirqs_on_caller+0x11e/0x155 [] ? trace_hardirqs_on+0xd/0xf [] rdma_destroy_id+0x33/0x1f0 [rdma_cm] [] cma_req_handler+0x418/0x644 [rdma_cm] [] cm_process_work+0x32/0x119 [ib_cm] [] cm_req_handler+0x928/0x982 [ib_cm] [] ? cm_req_handler+0x982/0x982 [ib_cm] [] cm_work_handler+0x33/0xfe5 [ib_cm] [] ? trace_hardirqs_on_caller+0x11e/0x155 [] ? cm_req_handler+0x982/0x982 [ib_cm] [] process_one_work+0x2bd/0x4a6 [] ? process_one_work+0x210/0x4a6 [] ? _raw_spin_unlock_irq+0x2b/0x40 [] worker_thread+0x1d6/0x350 [] ? rescuer_thread+0x241/0x241 [] kthread+0x84/0x8c [] kernel_thread_helper+0x4/0x10 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x56/0x56 [] ? gs_change+0xb/0xb The actual locking is fine, since we're dealing with different locks, but from the same lock class. cma_disable_callback() acquires the listening id mutex, whereas rdma_destroy_id() acquires the mutex for the new connection id. To fix this, delay the call to rdma_destroy_id() until we've released the listening id mutex. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3e470fecaa9..79c7eebb970f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1218,13 +1218,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } if (!conn_id) { ret = -ENOMEM; - goto out; + goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_acquire_dev(conn_id); if (ret) - goto release_conn_id; + goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; @@ -1236,31 +1236,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) { - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); - mutex_unlock(&lock); - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - goto out; - } + if (ret) + goto err3; + + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); + mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); + return 0; +err3: + cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; - -release_conn_id: +err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(&conn_id->id); - -out: +err1: mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); return ret; } -- cgit v1.2.3