summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 17:21:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 17:21:24 -0700
commitaf3877265dd88d7e333f94fb37bc09554544adca (patch)
tree3f84b1a5d1e052039f510cece55f1c06e580ea8b /drivers/infiniband/sw
parent1ae78a14516b9372e4c90a89ac21b259339a3a3a (diff)
parent531094dc7164718d28ebb581d729807d7e846363 (diff)
downloadlinux-stable-af3877265dd88d7e333f94fb37bc09554544adca.tar.gz
linux-stable-af3877265dd88d7e333f94fb37bc09554544adca.tar.bz2
linux-stable-af3877265dd88d7e333f94fb37bc09554544adca.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Usual wide collection of unrelated items in drivers: - Driver bug fixes and treewide cleanups in hfi1, siw, qib, mlx5, rxe, usnic, usnic, bnxt_re, ocrdma, iser: - remove unnecessary NULL checks - kmap obsolescence - pci_enable_pcie_error_reporting() obsolescence - unused variables and macros - trace event related warnings - casting warnings - Code cleanups for irdm and erdma - EFA reporting of 128 byte PCIe TLP support - mlx5 more agressively uses the out of order HW feature - Big rework of how state machines and tasks work in rxe - Fix a syzkaller found crash netdev refcount leak in siw - bnxt_re revises their HW description header - Congestion control for bnxt_re - Use mmu_notifiers more safely in hfi1 - mlx5 gets better support for PCIe relaxed ordering inside VMs" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (81 commits) RDMA/efa: Add rdma write capability to device caps RDMA/mlx5: Use correct device num_ports when modify DC RDMA/irdma: Drop spurious WQ_UNBOUND from alloc_ordered_workqueue() call RDMA/rxe: Fix spinlock recursion deadlock on requester RDMA/mlx5: Fix flow counter query via DEVX RDMA/rxe: Protect QP state with qp->state_lock RDMA/rxe: Move code to check if drained to subroutine RDMA/rxe: Remove qp->req.state RDMA/rxe: Remove qp->comp.state RDMA/rxe: Remove qp->resp.state RDMA/mlx5: Allow relaxed ordering read in VFs and VMs net/mlx5: Update relaxed ordering read HCA capabilities RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR RDMA/mlx5: Remove pcie_relaxed_ordering_enabled() check for RO write RDMA: Add ib_virt_dma_to_page() RDMA/rxe: Fix the error "trying to register non-static key in rxe_cleanup_task" RDMA/irdma: Slightly optimize irdma_form_ah_cm_frame() RDMA/rxe: Fix incorrect TASKLET_STATE_SCHED check in rxe_task.c IB/hfi1: Place struct mmu_rb_handler on cache line start IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests ...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h46
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c161
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c263
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c104
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c126
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c268
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c981
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h14
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c3
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c6
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c21
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c4
24 files changed, 1369 insertions, 793 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 9b4c0389d2c0..dc83d0ac6a38 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -464,8 +464,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi)
if (qps_inuse)
rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
qps_inuse);
- if (!rdi->qp_dev)
- return;
kfree(rdi->qp_dev->qp_table);
free_qpn_table(&rdi->qp_dev->qpn_table);
@@ -2040,7 +2038,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe = rvt_get_swqe_ptr(qp, qp->s_head);
/* cplen has length from above */
- memcpy(&wqe->wr, wr, cplen);
+ memcpy(&wqe->ud_wr, wr, cplen);
wqe->length = 0;
j = 0;
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 136c2efe3466..7a7e713de52d 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -160,6 +160,8 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
port->attr.active_mtu = mtu;
port->mtu_cap = ib_mtu_enum_to_int(mtu);
+
+ rxe_info_dev(rxe, "Set mtu to %d", port->mtu_cap);
}
/* called by ifc layer to create new rxe device.
@@ -175,26 +177,26 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name)
static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
{
- struct rxe_dev *exists;
+ struct rxe_dev *rxe;
int err = 0;
if (is_vlan_dev(ndev)) {
- pr_err("rxe creation allowed on top of a real device only\n");
+ rxe_err("rxe creation allowed on top of a real device only");
err = -EPERM;
goto err;
}
- exists = rxe_get_dev_from_net(ndev);
- if (exists) {
- ib_device_put(&exists->ib_dev);
- rxe_dbg(exists, "already configured on %s\n", ndev->name);
+ rxe = rxe_get_dev_from_net(ndev);
+ if (rxe) {
+ ib_device_put(&rxe->ib_dev);
+ rxe_err_dev(rxe, "already configured on %s", ndev->name);
err = -EEXIST;
goto err;
}
err = rxe_net_add(ibdev_name, ndev);
if (err) {
- rxe_dbg(exists, "failed to add %s\n", ndev->name);
+ rxe_err("failed to add %s\n", ndev->name);
goto err;
}
err:
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 2415f3704f57..d33dd6cf83d3 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -38,7 +38,8 @@
#define RXE_ROCE_V2_SPORT (0xc000)
-#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \
+#define rxe_dbg(fmt, ...) pr_debug("%s: " fmt "\n", __func__, ##__VA_ARGS__)
+#define rxe_dbg_dev(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \
"%s: " fmt, __func__, ##__VA_ARGS__)
#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \
"uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
@@ -57,6 +58,48 @@
#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \
"mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err(fmt, ...) pr_err_ratelimited("%s: " fmt "\n", __func__, \
+ ##__VA_ARGS__)
+#define rxe_err_dev(rxe, fmt, ...) ibdev_err_ratelimited(&(rxe)->ib_dev, \
+ "%s: " fmt, __func__, ##__VA_ARGS__)
+#define rxe_err_uc(uc, fmt, ...) ibdev_err_ratelimited((uc)->ibuc.device, \
+ "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_pd(pd, fmt, ...) ibdev_err_ratelimited((pd)->ibpd.device, \
+ "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_ah(ah, fmt, ...) ibdev_err_ratelimited((ah)->ibah.device, \
+ "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_srq(srq, fmt, ...) ibdev_err_ratelimited((srq)->ibsrq.device, \
+ "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_qp(qp, fmt, ...) ibdev_err_ratelimited((qp)->ibqp.device, \
+ "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_cq(cq, fmt, ...) ibdev_err_ratelimited((cq)->ibcq.device, \
+ "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_mr(mr, fmt, ...) ibdev_err_ratelimited((mr)->ibmr.device, \
+ "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_err_mw(mw, fmt, ...) ibdev_err_ratelimited((mw)->ibmw.device, \
+ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+
+#define rxe_info(fmt, ...) pr_info_ratelimited("%s: " fmt "\n", __func__, \
+ ##__VA_ARGS__)
+#define rxe_info_dev(rxe, fmt, ...) ibdev_info_ratelimited(&(rxe)->ib_dev, \
+ "%s: " fmt, __func__, ##__VA_ARGS__)
+#define rxe_info_uc(uc, fmt, ...) ibdev_info_ratelimited((uc)->ibuc.device, \
+ "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_pd(pd, fmt, ...) ibdev_info_ratelimited((pd)->ibpd.device, \
+ "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_ah(ah, fmt, ...) ibdev_info_ratelimited((ah)->ibah.device, \
+ "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_srq(srq, fmt, ...) ibdev_info_ratelimited((srq)->ibsrq.device, \
+ "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_qp(qp, fmt, ...) ibdev_info_ratelimited((qp)->ibqp.device, \
+ "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_cq(cq, fmt, ...) ibdev_info_ratelimited((cq)->ibcq.device, \
+ "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_mr(mr, fmt, ...) ibdev_info_ratelimited((mr)->ibmr.device, \
+ "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \
+ "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+
/* responder states */
enum resp_states {
RESPST_NONE,
@@ -90,7 +133,6 @@ enum resp_states {
RESPST_ERR_LENGTH,
RESPST_ERR_CQ_OVERFLOW,
RESPST_ERROR,
- RESPST_RESET,
RESPST_DONE,
RESPST_EXIT,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 20737fec392b..db18ace74d2b 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -118,10 +118,12 @@ void retransmit_timer(struct timer_list *t)
rxe_dbg_qp(qp, "retransmit timer fired\n");
+ spin_lock_bh(&qp->state_lock);
if (qp->valid) {
qp->comp.timeout = 1;
rxe_sched_task(&qp->comp.task);
}
+ spin_unlock_bh(&qp->state_lock);
}
void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
@@ -322,7 +324,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
qp->comp.psn = pkt->psn;
if (qp->req.wait_psn) {
qp->req.wait_psn = 0;
- rxe_run_task(&qp->req.task);
+ rxe_sched_task(&qp->req.task);
}
}
return COMPST_ERROR_RETRY;
@@ -428,6 +430,10 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
uwc->wc_flags = IB_WC_WITH_IMM;
uwc->byte_len = wqe->dma.length;
}
+ } else {
+ if (wqe->status != IB_WC_WR_FLUSH_ERR)
+ rxe_err_qp(qp, "non-flush error status = %d",
+ wqe->status);
}
}
@@ -469,30 +475,16 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
*/
if (qp->req.wait_fence) {
qp->req.wait_fence = 0;
- rxe_run_task(&qp->req.task);
+ rxe_sched_task(&qp->req.task);
}
}
-static inline enum comp_state complete_ack(struct rxe_qp *qp,
- struct rxe_pkt_info *pkt,
- struct rxe_send_wqe *wqe)
+static void comp_check_sq_drain_done(struct rxe_qp *qp)
{
- if (wqe->has_rd_atomic) {
- wqe->has_rd_atomic = 0;
- atomic_inc(&qp->req.rd_atomic);
- if (qp->req.need_rd_atomic) {
- qp->comp.timeout_retry = 0;
- qp->req.need_rd_atomic = 0;
- rxe_run_task(&qp->req.task);
- }
- }
-
- if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
- /* state_lock used by requester & completer */
- spin_lock_bh(&qp->state_lock);
- if ((qp->req.state == QP_STATE_DRAIN) &&
- (qp->comp.psn == qp->req.psn)) {
- qp->req.state = QP_STATE_DRAINED;
+ spin_lock_bh(&qp->state_lock);
+ if (unlikely(qp_state(qp) == IB_QPS_SQD)) {
+ if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) {
+ qp->attr.sq_draining = 0;
spin_unlock_bh(&qp->state_lock);
if (qp->ibqp.event_handler) {
@@ -504,10 +496,27 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
qp->ibqp.event_handler(&ev,
qp->ibqp.qp_context);
}
- } else {
- spin_unlock_bh(&qp->state_lock);
+ return;
}
}
+ spin_unlock_bh(&qp->state_lock);
+}
+
+static inline enum comp_state complete_ack(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt,
+ struct rxe_send_wqe *wqe)
+{
+ if (wqe->has_rd_atomic) {
+ wqe->has_rd_atomic = 0;
+ atomic_inc(&qp->req.rd_atomic);
+ if (qp->req.need_rd_atomic) {
+ qp->comp.timeout_retry = 0;
+ qp->req.need_rd_atomic = 0;
+ rxe_sched_task(&qp->req.task);
+ }
+ }
+
+ comp_check_sq_drain_done(qp);
do_complete(qp, wqe);
@@ -538,25 +547,60 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
return COMPST_GET_WQE;
}
-static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
+/* drain incoming response packet queue */
+static void drain_resp_pkts(struct rxe_qp *qp)
{
struct sk_buff *skb;
- struct rxe_send_wqe *wqe;
- struct rxe_queue *q = qp->sq.queue;
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
+}
+
+/* complete send wqe with flush error */
+static int flush_send_wqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+{
+ struct rxe_cqe cqe = {};
+ struct ib_wc *wc = &cqe.ibwc;
+ struct ib_uverbs_wc *uwc = &cqe.uibwc;
+ int err;
+
+ if (qp->is_user) {
+ uwc->wr_id = wqe->wr.wr_id;
+ uwc->status = IB_WC_WR_FLUSH_ERR;
+ uwc->qp_num = qp->ibqp.qp_num;
+ } else {
+ wc->wr_id = wqe->wr.wr_id;
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->qp = &qp->ibqp;
+ }
+
+ err = rxe_cq_post(qp->scq, &cqe, 0);
+ if (err)
+ rxe_dbg_cq(qp->scq, "post cq failed, err = %d", err);
+
+ return err;
+}
+
+/* drain and optionally complete the send queue
+ * if unable to complete a wqe, i.e. cq is full, stop
+ * completing and flush the remaining wqes
+ */
+static void flush_send_queue(struct rxe_qp *qp, bool notify)
+{
+ struct rxe_send_wqe *wqe;
+ struct rxe_queue *q = qp->sq.queue;
+ int err;
while ((wqe = queue_head(q, q->type))) {
if (notify) {
- wqe->status = IB_WC_WR_FLUSH_ERR;
- do_complete(qp, wqe);
- } else {
- queue_advance_consumer(q, q->type);
+ err = flush_send_wqe(qp, wqe);
+ if (err)
+ notify = 0;
}
+ queue_advance_consumer(q, q->type);
}
}
@@ -571,9 +615,28 @@ static void free_pkt(struct rxe_pkt_info *pkt)
ib_device_put(dev);
}
-int rxe_completer(void *arg)
+/* reset the retry timer if
+ * - QP is type RC
+ * - there is a packet sent by the requester that
+ * might be acked (we still might get spurious
+ * timeouts but try to keep them as few as possible)
+ * - the timeout parameter is set
+ * - the QP is alive
+ */
+static void reset_retry_timer(struct rxe_qp *qp)
+{
+ if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) {
+ spin_lock_bh(&qp->state_lock);
+ if (qp_state(qp) >= IB_QPS_RTS &&
+ psn_compare(qp->req.psn, qp->comp.psn) > 0)
+ mod_timer(&qp->retrans_timer,
+ jiffies + qp->qp_timeout_jiffies);
+ spin_unlock_bh(&qp->state_lock);
+ }
+}
+
+int rxe_completer(struct rxe_qp *qp)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_send_wqe *wqe = NULL;
struct sk_buff *skb = NULL;
@@ -581,15 +644,17 @@ int rxe_completer(void *arg)
enum comp_state state;
int ret;
- if (!rxe_get(qp))
- return -EAGAIN;
+ spin_lock_bh(&qp->state_lock);
+ if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
+ qp_state(qp) == IB_QPS_RESET) {
+ bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
- if (!qp->valid || qp->comp.state == QP_STATE_ERROR ||
- qp->comp.state == QP_STATE_RESET) {
- rxe_drain_resp_pkts(qp, qp->valid &&
- qp->comp.state == QP_STATE_ERROR);
+ drain_resp_pkts(qp);
+ flush_send_queue(qp, notify);
+ spin_unlock_bh(&qp->state_lock);
goto exit;
}
+ spin_unlock_bh(&qp->state_lock);
if (qp->comp.timeout) {
qp->comp.timeout_retry = 1;
@@ -677,20 +742,7 @@ int rxe_completer(void *arg)
break;
}
- /* re reset the timeout counter if
- * (1) QP is type RC
- * (2) the QP is alive
- * (3) there is a packet sent by the requester that
- * might be acked (we still might get spurious
- * timeouts but try to keep them as few as possible)
- * (4) the timeout parameter is set
- */
- if ((qp_type(qp) == IB_QPT_RC) &&
- (qp->req.state == QP_STATE_READY) &&
- (psn_compare(qp->req.psn, qp->comp.psn) > 0) &&
- qp->qp_timeout_jiffies)
- mod_timer(&qp->retrans_timer,
- jiffies + qp->qp_timeout_jiffies);
+ reset_retry_timer(qp);
goto exit;
case COMPST_ERROR_RETRY:
@@ -730,7 +782,7 @@ int rxe_completer(void *arg)
RXE_CNT_COMP_RETRY);
qp->req.need_retry = 1;
qp->comp.started_retry = 1;
- rxe_run_task(&qp->req.task);
+ rxe_sched_task(&qp->req.task);
}
goto done;
@@ -752,6 +804,7 @@ int rxe_completer(void *arg)
*/
qp->req.wait_for_rnr_timer = 1;
rxe_dbg_qp(qp, "set rnr nak timer\n");
+ // TODO who protects from destroy_qp??
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
@@ -784,7 +837,5 @@ exit:
out:
if (pkt)
free_pkt(pkt);
- rxe_put(qp);
-
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 1df186534639..20ff0c0c4605 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
int count;
if (cqe <= 0) {
- rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe);
+ rxe_dbg_dev(rxe, "cqe(%d) <= 0\n", cqe);
goto err1;
}
if (cqe > rxe->attr.max_cqe) {
- rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n",
+ rxe_dbg_dev(rxe, "cqe(%d) > max_cqe(%d)\n",
cqe, rxe->attr.max_cqe);
goto err1;
}
@@ -39,21 +39,6 @@ err1:
return -EINVAL;
}
-static void rxe_send_complete(struct tasklet_struct *t)
-{
- struct rxe_cq *cq = from_tasklet(cq, t, comp_task);
- unsigned long flags;
-
- spin_lock_irqsave(&cq->cq_lock, flags);
- if (cq->is_dying) {
- spin_unlock_irqrestore(&cq->cq_lock, flags);
- return;
- }
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
- cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-}
-
int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
int comp_vector, struct ib_udata *udata,
struct rxe_create_cq_resp __user *uresp)
@@ -65,7 +50,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->queue = rxe_queue_init(rxe, &cqe,
sizeof(struct rxe_cqe), type);
if (!cq->queue) {
- rxe_dbg(rxe, "unable to create cq\n");
+ rxe_dbg_dev(rxe, "unable to create cq\n");
return -ENOMEM;
}
@@ -79,10 +64,6 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
cq->is_user = uresp;
- cq->is_dying = false;
-
- tasklet_setup(&cq->comp_task, rxe_send_complete);
-
spin_lock_init(&cq->cq_lock);
cq->ibcq.cqe = cqe;
return 0;
@@ -103,6 +84,7 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int cqe,
return err;
}
+/* caller holds reference to cq */
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
{
struct ib_event ev;
@@ -114,6 +96,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
if (unlikely(full)) {
+ rxe_err_cq(cq, "queue full");
spin_unlock_irqrestore(&cq->cq_lock, flags);
if (cq->ibcq.event_handler) {
ev.device = cq->ibcq.device;
@@ -135,21 +118,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
if ((cq->notify == IB_CQ_NEXT_COMP) ||
(cq->notify == IB_CQ_SOLICITED && solicited)) {
cq->notify = 0;
- tasklet_schedule(&cq->comp_task);
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
return 0;
}
-void rxe_cq_disable(struct rxe_cq *cq)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cq->cq_lock, flags);
- cq->is_dying = true;
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-}
-
void rxe_cq_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_cq *cq = container_of(elem, typeof(*cq), elem);
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 71bc2c189588..fdf5f08cd8f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
- rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n",
+ rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n",
PTR_ERR(tfm));
return PTR_ERR(tfm);
}
@@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
*(__be32 *)shash_desc_ctx(shash) = crc;
err = crypto_shash_update(shash, next, len);
if (unlikely(err)) {
- rxe_dbg(rxe, "failed crc calculation, err: %d\n", err);
+ rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err);
return (__force __be32)crc32_le((__force u32)crc, next, len);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 1bb0cb479eb1..804b15e929dd 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -80,7 +80,6 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
-int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* rxe_mw.c */
@@ -171,9 +170,9 @@ void rxe_srq_cleanup(struct rxe_pool_elem *elem);
void rxe_dealloc(struct ib_device *ib_dev);
-int rxe_completer(void *arg);
-int rxe_requester(void *arg);
-int rxe_responder(void *arg);
+int rxe_completer(struct rxe_qp *qp);
+int rxe_requester(struct rxe_qp *qp);
+int rxe_responder(struct rxe_qp *qp);
/* rxe_icrc.c */
int rxe_icrc_init(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index a47d72dbc537..6b7f2bd69879 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
/* Don't allow a mmap larger than the object. */
if (size > ip->info.size) {
- rxe_dbg(rxe, "mmap region is larger than the object!\n");
+ rxe_dbg_dev(rxe, "mmap region is larger than the object!\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
goto found_it;
}
- rxe_dbg(rxe, "unable to find pending mmap info\n");
+ rxe_dbg_dev(rxe, "unable to find pending mmap info\n");
spin_unlock_bh(&rxe->pending_lock);
ret = -EINVAL;
goto done;
@@ -98,7 +98,7 @@ found_it:
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
- rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret);
+ rxe_dbg_dev(rxe, "err %d from remap_vmalloc_range\n", ret);
goto done;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index b10aa1580a64..0e538fafcc20 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -210,10 +210,10 @@ err1:
return err;
}
-static int rxe_set_page(struct ib_mr *ibmr, u64 iova)
+static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
{
struct rxe_mr *mr = to_rmr(ibmr);
- struct page *page = virt_to_page(iova & mr->page_mask);
+ struct page *page = ib_virt_dma_to_page(dma_addr);
bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
int err;
@@ -279,16 +279,16 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr,
return 0;
}
-static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr,
+static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr,
unsigned int length, enum rxe_mr_copy_dir dir)
{
- unsigned int page_offset = iova & (PAGE_SIZE - 1);
+ unsigned int page_offset = dma_addr & (PAGE_SIZE - 1);
unsigned int bytes;
struct page *page;
u8 *va;
while (length) {
- page = virt_to_page(iova & mr->page_mask);
+ page = ib_virt_dma_to_page(dma_addr);
bytes = min_t(unsigned int, length,
PAGE_SIZE - page_offset);
va = kmap_local_page(page);
@@ -300,7 +300,7 @@ static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr,
kunmap_local(va);
page_offset = 0;
- iova += bytes;
+ dma_addr += bytes;
addr += bytes;
length -= bytes;
}
@@ -488,7 +488,7 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
if (mr->ibmr.type == IB_MR_TYPE_DMA) {
page_offset = iova & (PAGE_SIZE - 1);
- page = virt_to_page(iova & PAGE_MASK);
+ page = ib_virt_dma_to_page(iova);
} else {
unsigned long index;
int err;
@@ -545,7 +545,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
if (mr->ibmr.type == IB_MR_TYPE_DMA) {
page_offset = iova & (PAGE_SIZE - 1);
- page = virt_to_page(iova & PAGE_MASK);
+ page = ib_virt_dma_to_page(iova);
} else {
unsigned long index;
int err;
@@ -722,19 +722,6 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return 0;
}
-int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
-{
- struct rxe_mr *mr = to_rmr(ibmr);
-
- /* See IBA 10.6.7.2.6 */
- if (atomic_read(&mr->num_mw) > 0)
- return -EINVAL;
-
- rxe_cleanup(mr);
- kfree_rcu(mr);
- return 0;
-}
-
void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index e02e1624bcf4..2bc7361152ea 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -413,11 +413,14 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int is_request = pkt->mask & RXE_REQ_MASK;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- if ((is_request && (qp->req.state != QP_STATE_READY)) ||
- (!is_request && (qp->resp.state != QP_STATE_READY))) {
+ spin_lock_bh(&qp->state_lock);
+ if ((is_request && (qp_state(qp) < IB_QPS_RTS)) ||
+ (!is_request && (qp_state(qp) < IB_QPS_RTR))) {
+ spin_unlock_bh(&qp->state_lock);
rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
goto drop;
}
+ spin_unlock_bh(&qp->state_lock);
rxe_icrc_generate(skb, pkt);
@@ -596,7 +599,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
- rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
+ rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
@@ -608,7 +611,7 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
- rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n",
+ rxe_dbg_dev(rxe, "ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index ab72db68b58f..c5451a4488ca 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
int has_srq)
{
if (cap->max_send_wr > rxe->attr.max_qp_wr) {
- rxe_dbg(rxe, "invalid send wr = %u > %d\n",
+ rxe_dbg_dev(rxe, "invalid send wr = %u > %d\n",
cap->max_send_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_send_sge > rxe->attr.max_send_sge) {
- rxe_dbg(rxe, "invalid send sge = %u > %d\n",
+ rxe_dbg_dev(rxe, "invalid send sge = %u > %d\n",
cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
if (!has_srq) {
if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
- rxe_dbg(rxe, "invalid recv wr = %u > %d\n",
+ rxe_dbg_dev(rxe, "invalid recv wr = %u > %d\n",
cap->max_recv_wr, rxe->attr.max_qp_wr);
goto err1;
}
if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
- rxe_dbg(rxe, "invalid recv sge = %u > %d\n",
+ rxe_dbg_dev(rxe, "invalid recv sge = %u > %d\n",
cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
if (cap->max_inline_data > rxe->max_inline_data) {
- rxe_dbg(rxe, "invalid max inline data = %u > %d\n",
+ rxe_dbg_dev(rxe, "invalid max inline data = %u > %d\n",
cap->max_inline_data, rxe->max_inline_data);
goto err1;
}
@@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
}
if (!init->recv_cq || !init->send_cq) {
- rxe_dbg(rxe, "missing cq\n");
+ rxe_dbg_dev(rxe, "missing cq\n");
goto err1;
}
@@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
- rxe_dbg(rxe, "invalid port = %d\n", port_num);
+ rxe_dbg_dev(rxe, "invalid port = %d\n", port_num);
goto err1;
}
port = &rxe->port;
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
- rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num);
+ rxe_dbg_dev(rxe, "GSI QP exists for port %d\n", port_num);
goto err1;
}
}
@@ -231,8 +231,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->req.wqe_index = queue_get_producer(qp->sq.queue,
QUEUE_TYPE_FROM_CLIENT);
- qp->req.state = QP_STATE_RESET;
- qp->comp.state = QP_STATE_RESET;
qp->req.opcode = -1;
qp->comp.opcode = -1;
@@ -287,7 +285,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
qp->resp.opcode = OPCODE_NONE;
qp->resp.msn = 0;
- qp->resp.state = QP_STATE_RESET;
return 0;
}
@@ -328,8 +325,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (err)
goto err2;
+ spin_lock_bh(&qp->state_lock);
qp->attr.qp_state = IB_QPS_RESET;
qp->valid = 1;
+ spin_unlock_bh(&qp->state_lock);
return 0;
@@ -380,30 +379,9 @@ int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init)
return 0;
}
-/* called by the modify qp verb, this routine checks all the parameters before
- * making any changes
- */
int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_attr *attr, int mask)
{
- enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
- attr->cur_qp_state : qp->attr.qp_state;
- enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
- attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
- rxe_dbg_qp(qp, "invalid mask or state\n");
- goto err1;
- }
-
- if (mask & IB_QP_STATE) {
- if (cur_state == IB_QPS_SQD) {
- if (qp->req.state == QP_STATE_DRAIN &&
- new_state != IB_QPS_ERR)
- goto err1;
- }
- }
-
if (mask & IB_QP_PORT) {
if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
@@ -473,29 +451,18 @@ static void rxe_qp_reset(struct rxe_qp *qp)
{
/* stop tasks from running */
rxe_disable_task(&qp->resp.task);
+ rxe_disable_task(&qp->comp.task);
+ rxe_disable_task(&qp->req.task);
- /* stop request/comp */
- if (qp->sq.queue) {
- if (qp_type(qp) == IB_QPT_RC)
- rxe_disable_task(&qp->comp.task);
- rxe_disable_task(&qp->req.task);
- }
-
- /* move qp to the reset state */
- qp->req.state = QP_STATE_RESET;
- qp->comp.state = QP_STATE_RESET;
- qp->resp.state = QP_STATE_RESET;
+ /* drain work and packet queuesc */
+ rxe_requester(qp);
+ rxe_completer(qp);
+ rxe_responder(qp);
- /* let state machines reset themselves drain work and packet queues
- * etc.
- */
- __rxe_do_task(&qp->resp.task);
-
- if (qp->sq.queue) {
- __rxe_do_task(&qp->comp.task);
- __rxe_do_task(&qp->req.task);
+ if (qp->rq.queue)
+ rxe_queue_reset(qp->rq.queue);
+ if (qp->sq.queue)
rxe_queue_reset(qp->sq.queue);
- }
/* cleanup attributes */
atomic_set(&qp->ssn, 0);
@@ -518,54 +485,103 @@ static void rxe_qp_reset(struct rxe_qp *qp)
/* reenable tasks */
rxe_enable_task(&qp->resp.task);
-
- if (qp->sq.queue) {
- if (qp_type(qp) == IB_QPT_RC)
- rxe_enable_task(&qp->comp.task);
-
- rxe_enable_task(&qp->req.task);
- }
-}
-
-/* drain the send queue */
-static void rxe_qp_drain(struct rxe_qp *qp)
-{
- if (qp->sq.queue) {
- if (qp->req.state != QP_STATE_DRAINED) {
- qp->req.state = QP_STATE_DRAIN;
- if (qp_type(qp) == IB_QPT_RC)
- rxe_sched_task(&qp->comp.task);
- else
- __rxe_do_task(&qp->comp.task);
- rxe_sched_task(&qp->req.task);
- }
- }
+ rxe_enable_task(&qp->comp.task);
+ rxe_enable_task(&qp->req.task);
}
/* move the qp to the error state */
void rxe_qp_error(struct rxe_qp *qp)
{
- qp->req.state = QP_STATE_ERROR;
- qp->resp.state = QP_STATE_ERROR;
- qp->comp.state = QP_STATE_ERROR;
+ spin_lock_bh(&qp->state_lock);
qp->attr.qp_state = IB_QPS_ERR;
/* drain work and packet queues */
rxe_sched_task(&qp->resp.task);
+ rxe_sched_task(&qp->comp.task);
+ rxe_sched_task(&qp->req.task);
+ spin_unlock_bh(&qp->state_lock);
+}
- if (qp_type(qp) == IB_QPT_RC)
- rxe_sched_task(&qp->comp.task);
- else
- __rxe_do_task(&qp->comp.task);
+static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr,
+ int mask)
+{
+ spin_lock_bh(&qp->state_lock);
+ qp->attr.sq_draining = 1;
+ rxe_sched_task(&qp->comp.task);
rxe_sched_task(&qp->req.task);
+ spin_unlock_bh(&qp->state_lock);
}
+/* caller should hold qp->state_lock */
+static int __qp_chk_state(struct rxe_qp *qp, struct ib_qp_attr *attr,
+ int mask)
+{
+ enum ib_qp_state cur_state;
+ enum ib_qp_state new_state;
+
+ cur_state = (mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state : qp->attr.qp_state;
+ new_state = (mask & IB_QP_STATE) ?
+ attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask))
+ return -EINVAL;
+
+ if (mask & IB_QP_STATE && cur_state == IB_QPS_SQD) {
+ if (qp->attr.sq_draining && new_state != IB_QPS_ERR)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const char *const qps2str[] = {
+ [IB_QPS_RESET] = "RESET",
+ [IB_QPS_INIT] = "INIT",
+ [IB_QPS_RTR] = "RTR",
+ [IB_QPS_RTS] = "RTS",
+ [IB_QPS_SQD] = "SQD",
+ [IB_QPS_SQE] = "SQE",
+ [IB_QPS_ERR] = "ERR",
+};
+
/* called by the modify qp verb */
int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
struct ib_udata *udata)
{
int err;
+ if (mask & IB_QP_CUR_STATE)
+ qp->attr.cur_qp_state = attr->qp_state;
+
+ if (mask & IB_QP_STATE) {
+ spin_lock_bh(&qp->state_lock);
+ err = __qp_chk_state(qp, attr, mask);
+ if (!err) {
+ qp->attr.qp_state = attr->qp_state;
+ rxe_dbg_qp(qp, "state -> %s\n",
+ qps2str[attr->qp_state]);
+ }
+ spin_unlock_bh(&qp->state_lock);
+
+ if (err)
+ return err;
+
+ switch (attr->qp_state) {
+ case IB_QPS_RESET:
+ rxe_qp_reset(qp);
+ break;
+ case IB_QPS_SQD:
+ rxe_qp_sqd(qp, attr, mask);
+ break;
+ case IB_QPS_ERR:
+ rxe_qp_error(qp);
+ break;
+ default:
+ break;
+ }
+ }
+
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
int max_rd_atomic = attr->max_rd_atomic ?
roundup_pow_of_two(attr->max_rd_atomic) : 0;
@@ -587,9 +603,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
return err;
}
- if (mask & IB_QP_CUR_STATE)
- qp->attr.cur_qp_state = attr->qp_state;
-
if (mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
qp->attr.en_sqd_async_notify = attr->en_sqd_async_notify;
@@ -669,50 +682,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_DEST_QPN)
qp->attr.dest_qp_num = attr->dest_qp_num;
- if (mask & IB_QP_STATE) {
- qp->attr.qp_state = attr->qp_state;
-
- switch (attr->qp_state) {
- case IB_QPS_RESET:
- rxe_dbg_qp(qp, "state -> RESET\n");
- rxe_qp_reset(qp);
- break;
-
- case IB_QPS_INIT:
- rxe_dbg_qp(qp, "state -> INIT\n");
- qp->req.state = QP_STATE_INIT;
- qp->resp.state = QP_STATE_INIT;
- qp->comp.state = QP_STATE_INIT;
- break;
-
- case IB_QPS_RTR:
- rxe_dbg_qp(qp, "state -> RTR\n");
- qp->resp.state = QP_STATE_READY;
- break;
-
- case IB_QPS_RTS:
- rxe_dbg_qp(qp, "state -> RTS\n");
- qp->req.state = QP_STATE_READY;
- qp->comp.state = QP_STATE_READY;
- break;
-
- case IB_QPS_SQD:
- rxe_dbg_qp(qp, "state -> SQD\n");
- rxe_qp_drain(qp);
- break;
-
- case IB_QPS_SQE:
- rxe_dbg_qp(qp, "state -> SQE !!?\n");
- /* Not possible from modify_qp. */
- break;
-
- case IB_QPS_ERR:
- rxe_dbg_qp(qp, "state -> ERR\n");
- rxe_qp_error(qp);
- break;
- }
- }
-
return 0;
}
@@ -736,18 +705,15 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
rxe_av_to_attr(&qp->pri_av, &attr->ah_attr);
rxe_av_to_attr(&qp->alt_av, &attr->alt_ah_attr);
- if (qp->req.state == QP_STATE_DRAIN) {
- attr->sq_draining = 1;
- /* applications that get this state
- * typically spin on it. yield the
- * processor
- */
+ /* Applications that get this state typically spin on it.
+ * Yield the processor
+ */
+ spin_lock_bh(&qp->state_lock);
+ if (qp->attr.sq_draining) {
+ spin_unlock_bh(&qp->state_lock);
cond_resched();
- } else {
- attr->sq_draining = 0;
}
-
- rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
+ spin_unlock_bh(&qp->state_lock);
return 0;
}
@@ -771,26 +737,29 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
+ spin_lock_bh(&qp->state_lock);
qp->valid = 0;
+ spin_unlock_bh(&qp->state_lock);
qp->qp_timeout_jiffies = 0;
- rxe_cleanup_task(&qp->resp.task);
if (qp_type(qp) == IB_QPT_RC) {
del_timer_sync(&qp->retrans_timer);
del_timer_sync(&qp->rnr_nak_timer);
}
- rxe_cleanup_task(&qp->req.task);
- rxe_cleanup_task(&qp->comp.task);
+ if (qp->resp.task.func)
+ rxe_cleanup_task(&qp->resp.task);
- /* flush out any receive wr's or pending requests */
if (qp->req.task.func)
- __rxe_do_task(&qp->req.task);
+ rxe_cleanup_task(&qp->req.task);
- if (qp->sq.queue) {
- __rxe_do_task(&qp->comp.task);
- __rxe_do_task(&qp->req.task);
- }
+ if (qp->comp.task.func)
+ rxe_cleanup_task(&qp->comp.task);
+
+ /* flush out any receive wr's or pending requests */
+ rxe_requester(qp);
+ rxe_completer(qp);
+ rxe_responder(qp);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c
index d6dbf5a0058d..9611ee191a46 100644
--- a/drivers/infiniband/sw/rxe/rxe_queue.c
+++ b/drivers/infiniband/sw/rxe/rxe_queue.c
@@ -61,11 +61,11 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
/* num_elem == 0 is allowed, but uninteresting */
if (*num_elem < 0)
- goto err1;
+ return NULL;
q = kzalloc(sizeof(*q), GFP_KERNEL);
if (!q)
- goto err1;
+ return NULL;
q->rxe = rxe;
q->type = type;
@@ -100,7 +100,6 @@ struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem,
err2:
kfree(q);
-err1:
return NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 434a693cd4a5..2f953cc74256 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -38,12 +38,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EINVAL;
}
+ spin_lock_bh(&qp->state_lock);
if (pkt->mask & RXE_REQ_MASK) {
- if (unlikely(qp->resp.state != QP_STATE_READY))
+ if (unlikely(qp_state(qp) < IB_QPS_RTR)) {
+ spin_unlock_bh(&qp->state_lock);
return -EINVAL;
- } else if (unlikely(qp->req.state < QP_STATE_READY ||
- qp->req.state > QP_STATE_DRAINED))
- return -EINVAL;
+ }
+ } else {
+ if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
+ spin_unlock_bh(&qp->state_lock);
+ return -EINVAL;
+ }
+ }
+ spin_unlock_bh(&qp->state_lock);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 899c8779f800..65134a9aefe7 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -102,44 +102,44 @@ void rnr_nak_timer(struct timer_list *t)
rxe_dbg_qp(qp, "nak timer fired\n");
- /* request a send queue retry */
- qp->req.need_retry = 1;
- qp->req.wait_for_rnr_timer = 0;
- rxe_sched_task(&qp->req.task);
+ spin_lock_bh(&qp->state_lock);
+ if (qp->valid) {
+ /* request a send queue retry */
+ qp->req.need_retry = 1;
+ qp->req.wait_for_rnr_timer = 0;
+ rxe_sched_task(&qp->req.task);
+ }
+ spin_unlock_bh(&qp->state_lock);
}
-static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+static void req_check_sq_drain_done(struct rxe_qp *qp)
{
- struct rxe_send_wqe *wqe;
- struct rxe_queue *q = qp->sq.queue;
- unsigned int index = qp->req.wqe_index;
+ struct rxe_queue *q;
+ unsigned int index;
unsigned int cons;
- unsigned int prod;
+ struct rxe_send_wqe *wqe;
- wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT);
- cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
- prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
+ spin_lock_bh(&qp->state_lock);
+ if (qp_state(qp) == IB_QPS_SQD) {
+ q = qp->sq.queue;
+ index = qp->req.wqe_index;
+ cons = queue_get_consumer(q, QUEUE_TYPE_FROM_CLIENT);
+ wqe = queue_addr_from_index(q, cons);
- if (unlikely(qp->req.state == QP_STATE_DRAIN)) {
/* check to see if we are drained;
* state_lock used by requester and completer
*/
- spin_lock_bh(&qp->state_lock);
do {
- if (qp->req.state != QP_STATE_DRAIN) {
+ if (!qp->attr.sq_draining)
/* comp just finished */
- spin_unlock_bh(&qp->state_lock);
break;
- }
if (wqe && ((index != cons) ||
- (wqe->state != wqe_state_posted))) {
+ (wqe->state != wqe_state_posted)))
/* comp not done yet */
- spin_unlock_bh(&qp->state_lock);
break;
- }
- qp->req.state = QP_STATE_DRAINED;
+ qp->attr.sq_draining = 0;
spin_unlock_bh(&qp->state_lock);
if (qp->ibqp.event_handler) {
@@ -151,18 +151,42 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
qp->ibqp.event_handler(&ev,
qp->ibqp.qp_context);
}
+ return;
} while (0);
}
+ spin_unlock_bh(&qp->state_lock);
+}
+
+static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp)
+{
+ struct rxe_queue *q = qp->sq.queue;
+ unsigned int index = qp->req.wqe_index;
+ unsigned int prod;
+ prod = queue_get_producer(q, QUEUE_TYPE_FROM_CLIENT);
if (index == prod)
return NULL;
+ else
+ return queue_addr_from_index(q, index);
+}
+
+static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
+{
+ struct rxe_send_wqe *wqe;
+
+ req_check_sq_drain_done(qp);
- wqe = queue_addr_from_index(q, index);
+ wqe = __req_next_wqe(qp);
+ if (wqe == NULL)
+ return NULL;
- if (unlikely((qp->req.state == QP_STATE_DRAIN ||
- qp->req.state == QP_STATE_DRAINED) &&
- (wqe->state != wqe_state_processing)))
+ spin_lock_bh(&qp->state_lock);
+ if (unlikely((qp_state(qp) == IB_QPS_SQD) &&
+ (wqe->state != wqe_state_processing))) {
+ spin_unlock_bh(&qp->state_lock);
return NULL;
+ }
+ spin_unlock_bh(&qp->state_lock);
wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp);
return wqe;
@@ -635,9 +659,8 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return 0;
}
-int rxe_requester(void *arg)
+int rxe_requester(struct rxe_qp *qp)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info pkt;
struct sk_buff *skb;
@@ -654,24 +677,22 @@ int rxe_requester(void *arg)
struct rxe_ah *ah;
struct rxe_av *av;
- if (!rxe_get(qp))
- return -EAGAIN;
-
- if (unlikely(!qp->valid))
+ spin_lock_bh(&qp->state_lock);
+ if (unlikely(!qp->valid)) {
+ spin_unlock_bh(&qp->state_lock);
goto exit;
+ }
- if (unlikely(qp->req.state == QP_STATE_ERROR)) {
- wqe = req_next_wqe(qp);
+ if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
+ wqe = __req_next_wqe(qp);
+ spin_unlock_bh(&qp->state_lock);
if (wqe)
- /*
- * Generate an error completion for error qp state
- */
goto err;
else
goto exit;
}
- if (unlikely(qp->req.state == QP_STATE_RESET)) {
+ if (unlikely(qp_state(qp) == IB_QPS_RESET)) {
qp->req.wqe_index = queue_get_consumer(q,
QUEUE_TYPE_FROM_CLIENT);
qp->req.opcode = -1;
@@ -679,8 +700,10 @@ int rxe_requester(void *arg)
qp->req.wait_psn = 0;
qp->req.need_retry = 0;
qp->req.wait_for_rnr_timer = 0;
+ spin_unlock_bh(&qp->state_lock);
goto exit;
}
+ spin_unlock_bh(&qp->state_lock);
/* we come here if the retransmit timer has fired
* or if the rnr timer has fired. If the retransmit
@@ -757,7 +780,7 @@ int rxe_requester(void *arg)
qp->req.wqe_index);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
- rxe_run_task(&qp->comp.task);
+ rxe_sched_task(&qp->comp.task);
goto done;
}
payload = mtu;
@@ -840,12 +863,9 @@ err:
/* update wqe_index for each wqe completion */
qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
wqe->state = wqe_state_error;
- qp->req.state = QP_STATE_ERROR;
- rxe_run_task(&qp->comp.task);
+ rxe_qp_error(qp);
exit:
ret = -EAGAIN;
out:
- rxe_put(qp);
-
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 0cc1ba91d48c..68f6cd188d8e 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -42,7 +42,6 @@ static char *resp_state_name[] = {
[RESPST_ERR_LENGTH] = "ERR_LENGTH",
[RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW",
[RESPST_ERROR] = "ERROR",
- [RESPST_RESET] = "RESET",
[RESPST_DONE] = "DONE",
[RESPST_EXIT] = "EXIT",
};
@@ -69,17 +68,6 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
{
struct sk_buff *skb;
- if (qp->resp.state == QP_STATE_ERROR) {
- while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_put(qp);
- kfree_skb(skb);
- ib_device_put(qp->ibqp.device);
- }
-
- /* go drain recv wr queue */
- return RESPST_CHK_RESOURCE;
- }
-
skb = skb_peek(&qp->req_pkts);
if (!skb)
return RESPST_EXIT;
@@ -334,24 +322,6 @@ static enum resp_states check_resource(struct rxe_qp *qp,
{
struct rxe_srq *srq = qp->srq;
- if (qp->resp.state == QP_STATE_ERROR) {
- if (qp->resp.wqe) {
- qp->resp.status = IB_WC_WR_FLUSH_ERR;
- return RESPST_COMPLETE;
- } else if (!srq) {
- qp->resp.wqe = queue_head(qp->rq.queue,
- QUEUE_TYPE_FROM_CLIENT);
- if (qp->resp.wqe) {
- qp->resp.status = IB_WC_WR_FLUSH_ERR;
- return RESPST_COMPLETE;
- } else {
- return RESPST_EXIT;
- }
- } else {
- return RESPST_EXIT;
- }
- }
-
if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
@@ -1151,6 +1121,10 @@ static enum resp_states do_complete(struct rxe_qp *qp,
wc->port_num = qp->attr.port_num;
}
+ } else {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ rxe_err_qp(qp, "non-flush error status = %d",
+ wc->status);
}
/* have copy for srq and reference for !srq */
@@ -1163,8 +1137,13 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERR_CQ_OVERFLOW;
finish:
- if (unlikely(qp->resp.state == QP_STATE_ERROR))
+ spin_lock_bh(&qp->state_lock);
+ if (unlikely(qp_state(qp) == IB_QPS_ERR)) {
+ spin_unlock_bh(&qp->state_lock);
return RESPST_CHK_RESOURCE;
+ }
+ spin_unlock_bh(&qp->state_lock);
+
if (unlikely(!pkt))
return RESPST_DONE;
if (qp_type(qp) == IB_QPT_RC)
@@ -1421,49 +1400,90 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
}
-static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+/* drain incoming request packet queue */
+static void drain_req_pkts(struct rxe_qp *qp)
{
struct sk_buff *skb;
- struct rxe_queue *q = qp->rq.queue;
while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_put(qp);
kfree_skb(skb);
ib_device_put(qp->ibqp.device);
}
+}
+
+/* complete receive wqe with flush error */
+static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe)
+{
+ struct rxe_cqe cqe = {};
+ struct ib_wc *wc = &cqe.ibwc;
+ struct ib_uverbs_wc *uwc = &cqe.uibwc;
+ int err;
+
+ if (qp->rcq->is_user) {
+ uwc->wr_id = wqe->wr_id;
+ uwc->status = IB_WC_WR_FLUSH_ERR;
+ uwc->qp_num = qp_num(qp);
+ } else {
+ wc->wr_id = wqe->wr_id;
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->qp = &qp->ibqp;
+ }
- if (notify)
+ err = rxe_cq_post(qp->rcq, &cqe, 0);
+ if (err)
+ rxe_dbg_cq(qp->rcq, "post cq failed err = %d", err);
+
+ return err;
+}
+
+/* drain and optionally complete the recive queue
+ * if unable to complete a wqe stop completing and
+ * just flush the remaining wqes
+ */
+static void flush_recv_queue(struct rxe_qp *qp, bool notify)
+{
+ struct rxe_queue *q = qp->rq.queue;
+ struct rxe_recv_wqe *wqe;
+ int err;
+
+ if (qp->srq)
return;
- while (!qp->srq && q && queue_head(q, q->type))
+ while ((wqe = queue_head(q, q->type))) {
+ if (notify) {
+ err = flush_recv_wqe(qp, wqe);
+ if (err)
+ notify = 0;
+ }
queue_advance_consumer(q, q->type);
+ }
+
+ qp->resp.wqe = NULL;
}
-int rxe_responder(void *arg)
+int rxe_responder(struct rxe_qp *qp)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
enum resp_states state;
struct rxe_pkt_info *pkt = NULL;
int ret;
- if (!rxe_get(qp))
- return -EAGAIN;
+ spin_lock_bh(&qp->state_lock);
+ if (!qp->valid || qp_state(qp) == IB_QPS_ERR ||
+ qp_state(qp) == IB_QPS_RESET) {
+ bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR);
- qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
-
- if (!qp->valid)
+ drain_req_pkts(qp);
+ flush_recv_queue(qp, notify);
+ spin_unlock_bh(&qp->state_lock);
goto exit;
+ }
+ spin_unlock_bh(&qp->state_lock);
- switch (qp->resp.state) {
- case QP_STATE_RESET:
- state = RESPST_RESET;
- break;
+ qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
- default:
- state = RESPST_GET_REQ;
- break;
- }
+ state = RESPST_GET_REQ;
while (1) {
rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
@@ -1622,11 +1642,6 @@ int rxe_responder(void *arg)
goto exit;
- case RESPST_RESET:
- rxe_drain_req_pkts(qp, false);
- qp->resp.wqe = NULL;
- goto exit;
-
case RESPST_ERROR:
qp->resp.goto_error = 0;
rxe_dbg_qp(qp, "moved to error state\n");
@@ -1648,6 +1663,5 @@ done:
exit:
ret = -EAGAIN;
out:
- rxe_put(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 82e37a41ced4..27ca82ec0826 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
- rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
+ rxe_dbg_dev(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
- rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
+ rxe_dbg_dev(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
@@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_sge > rxe->attr.max_srq_sge) {
- rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
+ rxe_dbg_dev(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 60b90e33a884..fb9a6bc8e620 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -6,69 +6,128 @@
#include "rxe.h"
-int __rxe_do_task(struct rxe_task *task)
+/* Check if task is idle i.e. not running, not scheduled in
+ * tasklet queue and not draining. If so move to busy to
+ * reserve a slot in do_task() by setting to busy and taking
+ * a qp reference to cover the gap from now until the task finishes.
+ * state will move out of busy if task returns a non zero value
+ * in do_task(). If state is already busy it is raised to armed
+ * to indicate to do_task that additional pass should be made
+ * over the task.
+ * Context: caller should hold task->lock.
+ * Returns: true if state transitioned from idle to busy else false.
+ */
+static bool __reserve_if_idle(struct rxe_task *task)
+{
+ WARN_ON(rxe_read(task->qp) <= 0);
+
+ if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
+ return false;
+
+ if (task->state == TASK_STATE_IDLE) {
+ rxe_get(task->qp);
+ task->state = TASK_STATE_BUSY;
+ task->num_sched++;
+ return true;
+ }
+
+ if (task->state == TASK_STATE_BUSY)
+ task->state = TASK_STATE_ARMED;
+ return false;
+}
+
+/* check if task is idle or drained and not currently
+ * scheduled in the tasklet queue. This routine is
+ * called by rxe_cleanup_task or rxe_disable_task to
+ * see if the queue is empty.
+ * Context: caller should hold task->lock.
+ * Returns true if done else false.
+ */
+static bool __is_done(struct rxe_task *task)
{
- int ret;
+ if (task->tasklet.state & BIT(TASKLET_STATE_SCHED))
+ return false;
- while ((ret = task->func(task->arg)) == 0)
- ;
+ if (task->state == TASK_STATE_IDLE ||
+ task->state == TASK_STATE_DRAINED) {
+ return true;
+ }
- task->ret = ret;
+ return false;
+}
- return ret;
+/* a locked version of __is_done */
+static bool is_done(struct rxe_task *task)
+{
+ unsigned long flags;
+ int done;
+
+ spin_lock_irqsave(&task->lock, flags);
+ done = __is_done(task);
+ spin_unlock_irqrestore(&task->lock, flags);
+
+ return done;
}
-/*
- * this locking is due to a potential race where
- * a second caller finds the task already running
- * but looks just after the last call to func
+/* do_task is a wrapper for the three tasks (requester,
+ * completer, responder) and calls them in a loop until
+ * they return a non-zero value. It is called either
+ * directly by rxe_run_task or indirectly if rxe_sched_task
+ * schedules the task. They must call __reserve_if_idle to
+ * move the task to busy before calling or scheduling.
+ * The task can also be moved to drained or invalid
+ * by calls to rxe-cleanup_task or rxe_disable_task.
+ * In that case tasks which get here are not executed but
+ * just flushed. The tasks are designed to look to see if
+ * there is work to do and do part of it before returning
+ * here with a return value of zero until all the work
+ * has been consumed then it retuens a non-zero value.
+ * The number of times the task can be run is limited by
+ * max iterations so one task cannot hold the cpu forever.
*/
static void do_task(struct tasklet_struct *t)
{
int cont;
int ret;
struct rxe_task *task = from_tasklet(task, t, tasklet);
- struct rxe_qp *qp = (struct rxe_qp *)task->arg;
- unsigned int iterations = RXE_MAX_ITERATIONS;
+ unsigned int iterations;
+ unsigned long flags;
+ int resched = 0;
- spin_lock_bh(&task->lock);
- switch (task->state) {
- case TASK_STATE_START:
- task->state = TASK_STATE_BUSY;
- spin_unlock_bh(&task->lock);
- break;
-
- case TASK_STATE_BUSY:
- task->state = TASK_STATE_ARMED;
- fallthrough;
- case TASK_STATE_ARMED:
- spin_unlock_bh(&task->lock);
- return;
+ WARN_ON(rxe_read(task->qp) <= 0);
- default:
- spin_unlock_bh(&task->lock);
- rxe_dbg_qp(qp, "failed with bad state %d\n", task->state);
+ spin_lock_irqsave(&task->lock, flags);
+ if (task->state >= TASK_STATE_DRAINED) {
+ rxe_put(task->qp);
+ task->num_done++;
+ spin_unlock_irqrestore(&task->lock, flags);
return;
}
+ spin_unlock_irqrestore(&task->lock, flags);
do {
+ iterations = RXE_MAX_ITERATIONS;
cont = 0;
- ret = task->func(task->arg);
- spin_lock_bh(&task->lock);
+ do {
+ ret = task->func(task->qp);
+ } while (ret == 0 && iterations-- > 0);
+
+ spin_lock_irqsave(&task->lock, flags);
switch (task->state) {
case TASK_STATE_BUSY:
if (ret) {
- task->state = TASK_STATE_START;
- } else if (iterations--) {
- cont = 1;
+ task->state = TASK_STATE_IDLE;
} else {
- /* reschedule the tasklet and exit
+ /* This can happen if the client
+ * can add work faster than the
+ * tasklet can finish it.
+ * Reschedule the tasklet and exit
* the loop to give up the cpu
*/
- tasklet_schedule(&task->tasklet);
- task->state = TASK_STATE_START;
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
}
break;
@@ -81,71 +140,158 @@ static void do_task(struct tasklet_struct *t)
cont = 1;
break;
+ case TASK_STATE_DRAINING:
+ if (ret)
+ task->state = TASK_STATE_DRAINED;
+ else
+ cont = 1;
+ break;
+
default:
- rxe_dbg_qp(qp, "failed with bad state %d\n",
- task->state);
+ WARN_ON(1);
+ rxe_info_qp(task->qp, "unexpected task state = %d", task->state);
+ }
+
+ if (!cont) {
+ task->num_done++;
+ if (WARN_ON(task->num_done != task->num_sched))
+ rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done",
+ task->num_sched, task->num_done);
}
- spin_unlock_bh(&task->lock);
+ spin_unlock_irqrestore(&task->lock, flags);
} while (cont);
task->ret = ret;
+
+ if (resched)
+ rxe_sched_task(task);
+
+ rxe_put(task->qp);
}
-int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
+int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
+ int (*func)(struct rxe_qp *))
{
- task->arg = arg;
- task->func = func;
- task->destroyed = false;
+ WARN_ON(rxe_read(qp) <= 0);
+
+ task->qp = qp;
+ task->func = func;
tasklet_setup(&task->tasklet, do_task);
- task->state = TASK_STATE_START;
+ task->state = TASK_STATE_IDLE;
spin_lock_init(&task->lock);
return 0;
}
+/* rxe_cleanup_task is only called from rxe_do_qp_cleanup in
+ * process context. The qp is already completed with no
+ * remaining references. Once the queue is drained the
+ * task is moved to invalid and returns. The qp cleanup
+ * code then calls the task functions directly without
+ * using the task struct to drain any late arriving packets
+ * or work requests.
+ */
void rxe_cleanup_task(struct rxe_task *task)
{
- bool idle;
+ unsigned long flags;
- /*
- * Mark the task, then wait for it to finish. It might be
- * running in a non-tasklet (direct call) context.
- */
- task->destroyed = true;
+ spin_lock_irqsave(&task->lock, flags);
+ if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
+ task->state = TASK_STATE_DRAINING;
+ } else {
+ task->state = TASK_STATE_INVALID;
+ spin_unlock_irqrestore(&task->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&task->lock, flags);
- do {
- spin_lock_bh(&task->lock);
- idle = (task->state == TASK_STATE_START);
- spin_unlock_bh(&task->lock);
- } while (!idle);
+ /* now the task cannot be scheduled or run just wait
+ * for the previously scheduled tasks to finish.
+ */
+ while (!is_done(task))
+ cond_resched();
tasklet_kill(&task->tasklet);
+
+ spin_lock_irqsave(&task->lock, flags);
+ task->state = TASK_STATE_INVALID;
+ spin_unlock_irqrestore(&task->lock, flags);
}
+/* run the task inline if it is currently idle
+ * cannot call do_task holding the lock
+ */
void rxe_run_task(struct rxe_task *task)
{
- if (task->destroyed)
- return;
+ unsigned long flags;
+ int run;
+
+ WARN_ON(rxe_read(task->qp) <= 0);
+
+ spin_lock_irqsave(&task->lock, flags);
+ run = __reserve_if_idle(task);
+ spin_unlock_irqrestore(&task->lock, flags);
- do_task(&task->tasklet);
+ if (run)
+ do_task(&task->tasklet);
}
+/* schedule the task to run later as a tasklet.
+ * the tasklet)schedule call can be called holding
+ * the lock.
+ */
void rxe_sched_task(struct rxe_task *task)
{
- if (task->destroyed)
- return;
+ unsigned long flags;
- tasklet_schedule(&task->tasklet);
+ WARN_ON(rxe_read(task->qp) <= 0);
+
+ spin_lock_irqsave(&task->lock, flags);
+ if (__reserve_if_idle(task))
+ tasklet_schedule(&task->tasklet);
+ spin_unlock_irqrestore(&task->lock, flags);
}
+/* rxe_disable/enable_task are only called from
+ * rxe_modify_qp in process context. Task is moved
+ * to the drained state by do_task.
+ */
void rxe_disable_task(struct rxe_task *task)
{
+ unsigned long flags;
+
+ WARN_ON(rxe_read(task->qp) <= 0);
+
+ spin_lock_irqsave(&task->lock, flags);
+ if (!__is_done(task) && task->state < TASK_STATE_DRAINED) {
+ task->state = TASK_STATE_DRAINING;
+ } else {
+ task->state = TASK_STATE_DRAINED;
+ spin_unlock_irqrestore(&task->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&task->lock, flags);
+
+ while (!is_done(task))
+ cond_resched();
+
tasklet_disable(&task->tasklet);
}
void rxe_enable_task(struct rxe_task *task)
{
+ unsigned long flags;
+
+ WARN_ON(rxe_read(task->qp) <= 0);
+
+ spin_lock_irqsave(&task->lock, flags);
+ if (task->state == TASK_STATE_INVALID) {
+ spin_unlock_irqrestore(&task->lock, flags);
+ return;
+ }
+ task->state = TASK_STATE_IDLE;
tasklet_enable(&task->tasklet);
+ spin_unlock_irqrestore(&task->lock, flags);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 7b88129702ac..facb7c8e3729 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -8,9 +8,12 @@
#define RXE_TASK_H
enum {
- TASK_STATE_START = 0,
+ TASK_STATE_IDLE = 0,
TASK_STATE_BUSY = 1,
TASK_STATE_ARMED = 2,
+ TASK_STATE_DRAINING = 3,
+ TASK_STATE_DRAINED = 4,
+ TASK_STATE_INVALID = 5,
};
/*
@@ -22,28 +25,24 @@ struct rxe_task {
struct tasklet_struct tasklet;
int state;
spinlock_t lock;
- void *arg;
- int (*func)(void *arg);
+ struct rxe_qp *qp;
+ int (*func)(struct rxe_qp *qp);
int ret;
- bool destroyed;
+ long num_sched;
+ long num_done;
};
/*
* init rxe_task structure
- * arg => parameter to pass to fcn
+ * qp => parameter to pass to func
* func => function to call until it returns != 0
*/
-int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
+int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp,
+ int (*func)(struct rxe_qp *));
/* cleanup task */
void rxe_cleanup_task(struct rxe_task *task);
-/*
- * raw call to func in loop without any checking
- * can call when tasklets are disabled
- */
-int __rxe_do_task(struct rxe_task *task);
-
void rxe_run_task(struct rxe_task *task);
void rxe_sched_task(struct rxe_task *task);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e14050a69276..dea605b7f683 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -12,31 +12,48 @@
#include "rxe_queue.h"
#include "rxe_hw_counters.h"
-static int rxe_query_device(struct ib_device *dev,
+static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr);
+
+/* dev */
+static int rxe_query_device(struct ib_device *ibdev,
struct ib_device_attr *attr,
- struct ib_udata *uhw)
+ struct ib_udata *udata)
{
- struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ int err;
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
+ if (udata->inlen || udata->outlen) {
+ rxe_dbg_dev(rxe, "malformed udata");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ memcpy(attr, &rxe->attr, sizeof(*attr));
- *attr = rxe->attr;
return 0;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_query_port(struct ib_device *dev,
+static int rxe_query_port(struct ib_device *ibdev,
u32 port_num, struct ib_port_attr *attr)
{
- struct rxe_dev *rxe = to_rdev(dev);
- int rc;
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ int err, ret;
+
+ if (port_num != 1) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "bad port_num = %d", port_num);
+ goto err_out;
+ }
- /* *attr being zeroed by the caller, avoid zeroing it here */
- *attr = rxe->port.attr;
+ memcpy(attr, &rxe->port.attr, sizeof(*attr));
mutex_lock(&rxe->usdev_lock);
- rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
- &attr->active_width);
+ ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed,
+ &attr->active_width);
if (attr->state == IB_PORT_ACTIVE)
attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
@@ -47,27 +64,45 @@ static int rxe_query_port(struct ib_device *dev,
mutex_unlock(&rxe->usdev_lock);
- return rc;
+ return ret;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_query_pkey(struct ib_device *device,
+static int rxe_query_pkey(struct ib_device *ibdev,
u32 port_num, u16 index, u16 *pkey)
{
- if (index > 0)
- return -EINVAL;
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ int err;
+
+ if (index != 0) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "bad pkey index = %d", index);
+ goto err_out;
+ }
*pkey = IB_DEFAULT_PKEY_FULL;
return 0;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_modify_device(struct ib_device *dev,
+static int rxe_modify_device(struct ib_device *ibdev,
int mask, struct ib_device_modify *attr)
{
- struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ int err;
if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
- IB_DEVICE_MODIFY_NODE_DESC))
- return -EOPNOTSUPP;
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_dev(rxe, "unsupported mask = 0x%x", mask);
+ goto err_out;
+ }
if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
@@ -78,16 +113,33 @@ static int rxe_modify_device(struct ib_device *dev,
}
return 0;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_modify_port(struct ib_device *dev,
- u32 port_num, int mask, struct ib_port_modify *attr)
+static int rxe_modify_port(struct ib_device *ibdev, u32 port_num,
+ int mask, struct ib_port_modify *attr)
{
- struct rxe_dev *rxe = to_rdev(dev);
+ struct rxe_dev *rxe = to_rdev(ibdev);
struct rxe_port *port;
+ int err;
- port = &rxe->port;
+ if (port_num != 1) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "bad port_num = %d", port_num);
+ goto err_out;
+ }
+ //TODO is shutdown useful
+ if (mask & ~(IB_PORT_RESET_QKEY_CNTR)) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_dev(rxe, "unsupported mask = 0x%x", mask);
+ goto err_out;
+ }
+
+ port = &rxe->port;
port->attr.port_cap_flags |= attr->set_port_cap_mask;
port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
@@ -95,73 +147,125 @@ static int rxe_modify_port(struct ib_device *dev,
port->attr.qkey_viol_cntr = 0;
return 0;
-}
-static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
- u32 port_num)
-{
- return IB_LINK_LAYER_ETHERNET;
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
+static enum rdma_link_layer rxe_get_link_layer(struct ib_device *ibdev,
+ u32 port_num)
{
- struct rxe_dev *rxe = to_rdev(ibuc->device);
- struct rxe_ucontext *uc = to_ruc(ibuc);
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ int err;
- return rxe_add_to_pool(&rxe->uc_pool, uc);
-}
+ if (port_num != 1) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "bad port_num = %d", port_num);
+ goto err_out;
+ }
-static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
-{
- struct rxe_ucontext *uc = to_ruc(ibuc);
+ return IB_LINK_LAYER_ETHERNET;
- rxe_cleanup(uc);
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
-static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
+static int rxe_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ struct ib_port_attr attr = {};
int err;
- struct ib_port_attr attr;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ if (port_num != 1) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "bad port_num = %d", port_num);
+ goto err_out;
+ }
- err = ib_query_port(dev, port_num, &attr);
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
- return err;
+ goto err_out;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
+/* uc */
+static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
+{
+ struct rxe_dev *rxe = to_rdev(ibuc->device);
+ struct rxe_ucontext *uc = to_ruc(ibuc);
+ int err;
+
+ err = rxe_add_to_pool(&rxe->uc_pool, uc);
+ if (err)
+ rxe_err_dev(rxe, "unable to create uc");
+
+ return err;
+}
+
+static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
+{
+ struct rxe_ucontext *uc = to_ruc(ibuc);
+ int err;
+
+ err = rxe_cleanup(uc);
+ if (err)
+ rxe_err_uc(uc, "cleanup failed, err = %d", err);
+}
+
+/* pd */
static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
+ int err;
- return rxe_add_to_pool(&rxe->pd_pool, pd);
+ err = rxe_add_to_pool(&rxe->pd_pool, pd);
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to alloc pd");
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct rxe_pd *pd = to_rpd(ibpd);
+ int err;
+
+ err = rxe_cleanup(pd);
+ if (err)
+ rxe_err_pd(pd, "cleanup failed, err = %d", err);
- rxe_cleanup(pd);
return 0;
}
+/* ah */
static int rxe_create_ah(struct ib_ah *ibah,
struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
-
{
struct rxe_dev *rxe = to_rdev(ibah->device);
struct rxe_ah *ah = to_rah(ibah);
struct rxe_create_ah_resp __user *uresp = NULL;
- int err;
+ int err, cleanup_err;
if (udata) {
/* test if new user provider */
@@ -174,16 +278,18 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to create ah");
+ goto err_out;
+ }
/* create index > 0 */
ah->ah_num = ah->elem.index;
err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
if (err) {
- rxe_cleanup(ah);
- return err;
+ rxe_dbg_ah(ah, "bad attr");
+ goto err_cleanup;
}
if (uresp) {
@@ -191,8 +297,9 @@ static int rxe_create_ah(struct ib_ah *ibah,
err = copy_to_user(&uresp->ah_num, &ah->ah_num,
sizeof(uresp->ah_num));
if (err) {
- rxe_cleanup(ah);
- return -EFAULT;
+ err = -EFAULT;
+ rxe_dbg_ah(ah, "unable to copy to user");
+ goto err_cleanup;
}
} else if (ah->is_user) {
/* only if old user provider */
@@ -203,19 +310,34 @@ static int rxe_create_ah(struct ib_ah *ibah,
rxe_finalize(ah);
return 0;
+
+err_cleanup:
+ cleanup_err = rxe_cleanup(ah);
+ if (cleanup_err)
+ rxe_err_ah(ah, "cleanup failed, err = %d", cleanup_err);
+err_out:
+ rxe_err_ah(ah, "returned err = %d", err);
+ return err;
}
static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
- int err;
struct rxe_ah *ah = to_rah(ibah);
+ int err;
err = rxe_ah_chk_attr(ah, attr);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_ah(ah, "bad attr");
+ goto err_out;
+ }
rxe_init_av(attr, &ah->av);
+
return 0;
+
+err_out:
+ rxe_err_ah(ah, "returned err = %d", err);
+ return err;
}
static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
@@ -225,92 +347,77 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
memset(attr, 0, sizeof(*attr));
attr->type = ibah->type;
rxe_av_to_attr(&ah->av, attr);
+
return 0;
}
static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
+ int err;
- rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
-
- return 0;
-}
-
-static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
-{
- int i;
- u32 length;
- struct rxe_recv_wqe *recv_wqe;
- int num_sge = ibwr->num_sge;
- int full;
-
- full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP);
- if (unlikely(full))
- return -ENOMEM;
-
- if (unlikely(num_sge > rq->max_sge))
- return -EINVAL;
-
- length = 0;
- for (i = 0; i < num_sge; i++)
- length += ibwr->sg_list[i].length;
-
- recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP);
- recv_wqe->wr_id = ibwr->wr_id;
-
- memcpy(recv_wqe->dma.sge, ibwr->sg_list,
- num_sge * sizeof(struct ib_sge));
-
- recv_wqe->dma.length = length;
- recv_wqe->dma.resid = length;
- recv_wqe->dma.num_sge = num_sge;
- recv_wqe->dma.cur_sge = 0;
- recv_wqe->dma.sge_offset = 0;
-
- queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP);
+ err = rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
+ if (err)
+ rxe_err_ah(ah, "cleanup failed, err = %d", err);
return 0;
}
+/* srq */
static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
struct ib_udata *udata)
{
- int err;
struct rxe_dev *rxe = to_rdev(ibsrq->device);
struct rxe_pd *pd = to_rpd(ibsrq->pd);
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_create_srq_resp __user *uresp = NULL;
+ int err, cleanup_err;
if (udata) {
- if (udata->outlen < sizeof(*uresp))
- return -EINVAL;
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ rxe_err_dev(rxe, "malformed udata");
+ goto err_out;
+ }
uresp = udata->outbuf;
}
- if (init->srq_type != IB_SRQT_BASIC)
- return -EOPNOTSUPP;
+ if (init->srq_type != IB_SRQT_BASIC) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_dev(rxe, "srq type = %d, not supported",
+ init->srq_type);
+ goto err_out;
+ }
err = rxe_srq_chk_init(rxe, init);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_dev(rxe, "invalid init attributes");
+ goto err_out;
+ }
err = rxe_add_to_pool(&rxe->srq_pool, srq);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to create srq, err = %d", err);
+ goto err_out;
+ }
rxe_get(pd);
srq->pd = pd;
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
- if (err)
+ if (err) {
+ rxe_dbg_srq(srq, "create srq failed, err = %d", err);
goto err_cleanup;
+ }
return 0;
err_cleanup:
- rxe_cleanup(srq);
-
+ cleanup_err = rxe_cleanup(srq);
+ if (cleanup_err)
+ rxe_err_srq(srq, "cleanup failed, err = %d", cleanup_err);
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
return err;
}
@@ -318,46 +425,64 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask mask,
struct ib_udata *udata)
{
- int err;
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_dev *rxe = to_rdev(ibsrq->device);
- struct rxe_modify_srq_cmd ucmd = {};
+ struct rxe_modify_srq_cmd cmd = {};
+ int err;
if (udata) {
- if (udata->inlen < sizeof(ucmd))
- return -EINVAL;
+ if (udata->inlen < sizeof(cmd)) {
+ err = -EINVAL;
+ rxe_dbg_srq(srq, "malformed udata");
+ goto err_out;
+ }
- err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
- if (err)
- return err;
+ err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (err) {
+ err = -EFAULT;
+ rxe_dbg_srq(srq, "unable to read udata");
+ goto err_out;
+ }
}
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_srq(srq, "bad init attributes");
+ goto err_out;
+ }
+
+ err = rxe_srq_from_attr(rxe, srq, attr, mask, &cmd, udata);
+ if (err) {
+ rxe_dbg_srq(srq, "bad attr");
+ goto err_out;
+ }
+
+ return 0;
- return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
+err_out:
+ rxe_err_srq(srq, "returned err = %d", err);
+ return err;
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
+ int err;
- if (srq->error)
- return -EINVAL;
+ if (srq->error) {
+ err = -EINVAL;
+ rxe_dbg_srq(srq, "srq in error state");
+ goto err_out;
+ }
attr->max_wr = srq->rq.queue->buf->index_mask;
attr->max_sge = srq->rq.max_sge;
attr->srq_limit = srq->limit;
return 0;
-}
-static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
-{
- struct rxe_srq *srq = to_rsrq(ibsrq);
-
- rxe_cleanup(srq);
- return 0;
+err_out:
+ rxe_err_srq(srq, "returned err = %d", err);
+ return err;
}
static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
@@ -378,76 +503,116 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
- if (err)
+ if (err) {
*bad_wr = wr;
+ rxe_err_srq(srq, "returned err = %d", err);
+ }
return err;
}
+static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct rxe_srq *srq = to_rsrq(ibsrq);
+ int err;
+
+ err = rxe_cleanup(srq);
+ if (err)
+ rxe_err_srq(srq, "cleanup failed, err = %d", err);
+
+ return 0;
+}
+
+/* qp */
static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
struct ib_udata *udata)
{
- int err;
struct rxe_dev *rxe = to_rdev(ibqp->device);
struct rxe_pd *pd = to_rpd(ibqp->pd);
struct rxe_qp *qp = to_rqp(ibqp);
struct rxe_create_qp_resp __user *uresp = NULL;
+ int err, cleanup_err;
if (udata) {
- if (udata->outlen < sizeof(*uresp))
- return -EINVAL;
- uresp = udata->outbuf;
- }
-
- if (init->create_flags)
- return -EOPNOTSUPP;
-
- err = rxe_qp_chk_init(rxe, init);
- if (err)
- return err;
+ if (udata->inlen) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "malformed udata, err = %d", err);
+ goto err_out;
+ }
- if (udata) {
- if (udata->inlen)
- return -EINVAL;
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "malformed udata, err = %d", err);
+ goto err_out;
+ }
qp->is_user = true;
+ uresp = udata->outbuf;
} else {
qp->is_user = false;
}
+ if (init->create_flags) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_dev(rxe, "unsupported create_flags, err = %d", err);
+ goto err_out;
+ }
+
+ err = rxe_qp_chk_init(rxe, init);
+ if (err) {
+ rxe_dbg_dev(rxe, "bad init attr, err = %d", err);
+ goto err_out;
+ }
+
err = rxe_add_to_pool(&rxe->qp_pool, qp);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to create qp, err = %d", err);
+ goto err_out;
+ }
err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
- if (err)
- goto qp_init;
+ if (err) {
+ rxe_dbg_qp(qp, "create qp failed, err = %d", err);
+ goto err_cleanup;
+ }
rxe_finalize(qp);
return 0;
-qp_init:
- rxe_cleanup(qp);
+err_cleanup:
+ cleanup_err = rxe_cleanup(qp);
+ if (cleanup_err)
+ rxe_err_qp(qp, "cleanup failed, err = %d", cleanup_err);
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
return err;
}
static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int mask, struct ib_udata *udata)
{
- int err;
struct rxe_dev *rxe = to_rdev(ibqp->device);
struct rxe_qp *qp = to_rqp(ibqp);
+ int err;
- if (mask & ~IB_QP_ATTR_STANDARD_BITS)
- return -EOPNOTSUPP;
+ if (mask & ~IB_QP_ATTR_STANDARD_BITS) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_qp(qp, "unsupported mask = 0x%x, err = %d",
+ mask, err);
+ goto err_out;
+ }
err = rxe_qp_chk_attr(rxe, qp, attr, mask);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_qp(qp, "bad mask/attr, err = %d", err);
+ goto err_out;
+ }
err = rxe_qp_from_attr(qp, attr, mask, udata);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_qp(qp, "modify qp failed, err = %d", err);
+ goto err_out;
+ }
if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
@@ -455,6 +620,10 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->attr.dest_qp_num);
return 0;
+
+err_out:
+ rxe_err_qp(qp, "returned err = %d", err);
+ return err;
}
static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -471,41 +640,90 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct rxe_qp *qp = to_rqp(ibqp);
- int ret;
+ int err;
- ret = rxe_qp_chk_destroy(qp);
- if (ret)
- return ret;
+ err = rxe_qp_chk_destroy(qp);
+ if (err) {
+ rxe_dbg_qp(qp, "unable to destroy qp, err = %d", err);
+ goto err_out;
+ }
+
+ err = rxe_cleanup(qp);
+ if (err)
+ rxe_err_qp(qp, "cleanup failed, err = %d", err);
- rxe_cleanup(qp);
return 0;
+
+err_out:
+ rxe_err_qp(qp, "returned err = %d", err);
+ return err;
}
+/* send wr */
+
+/* sanity check incoming send work request */
static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
- unsigned int mask, unsigned int length)
+ unsigned int *maskp, unsigned int *lengthp)
{
int num_sge = ibwr->num_sge;
struct rxe_sq *sq = &qp->sq;
+ unsigned int mask = 0;
+ unsigned long length = 0;
+ int err = -EINVAL;
+ int i;
- if (unlikely(num_sge > sq->max_sge))
- return -EINVAL;
+ do {
+ mask = wr_opcode_mask(ibwr->opcode, qp);
+ if (!mask) {
+ rxe_err_qp(qp, "bad wr opcode for qp type");
+ break;
+ }
- if (unlikely(mask & WR_ATOMIC_MASK)) {
- if (length < 8)
- return -EINVAL;
+ if (num_sge > sq->max_sge) {
+ rxe_err_qp(qp, "num_sge > max_sge");
+ break;
+ }
- if (atomic_wr(ibwr)->remote_addr & 0x7)
- return -EINVAL;
- }
+ length = 0;
+ for (i = 0; i < ibwr->num_sge; i++)
+ length += ibwr->sg_list[i].length;
- if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
- (length > sq->max_inline)))
- return -EINVAL;
+ if (length > (1UL << 31)) {
+ rxe_err_qp(qp, "message length too long");
+ break;
+ }
- return 0;
+ if (mask & WR_ATOMIC_MASK) {
+ if (length != 8) {
+ rxe_err_qp(qp, "atomic length != 8");
+ break;
+ }
+ if (atomic_wr(ibwr)->remote_addr & 0x7) {
+ rxe_err_qp(qp, "misaligned atomic address");
+ break;
+ }
+ }
+ if (ibwr->send_flags & IB_SEND_INLINE) {
+ if (!(mask & WR_INLINE_MASK)) {
+ rxe_err_qp(qp, "opcode doesn't support inline data");
+ break;
+ }
+ if (length > sq->max_inline) {
+ rxe_err_qp(qp, "inline length too big");
+ break;
+ }
+ }
+
+ err = 0;
+ } while (0);
+
+ *maskp = mask;
+ *lengthp = (int)length;
+
+ return err;
}
-static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
+static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
const struct ib_send_wr *ibwr)
{
wr->wr_id = ibwr->wr_id;
@@ -521,8 +739,18 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
if (qp_type(qp) == IB_QPT_GSI)
wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
- if (wr->opcode == IB_WR_SEND_WITH_IMM)
+
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
wr->ex.imm_data = ibwr->ex.imm_data;
+ break;
+ case IB_WR_SEND:
+ break;
+ default:
+ rxe_err_qp(qp, "bad wr opcode %d for UD/GSI QP",
+ wr->opcode);
+ return -EINVAL;
+ }
} else {
switch (wr->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -539,6 +767,11 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
case IB_WR_SEND_WITH_INV:
wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
+ wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
+ wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
+ break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
wr->wr.atomic.remote_addr =
@@ -550,16 +783,26 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
break;
case IB_WR_LOCAL_INV:
wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
- break;
+ break;
case IB_WR_REG_MR:
wr->wr.reg.mr = reg_wr(ibwr)->mr;
wr->wr.reg.key = reg_wr(ibwr)->key;
wr->wr.reg.access = reg_wr(ibwr)->access;
- break;
+ break;
+ case IB_WR_SEND:
+ case IB_WR_BIND_MW:
+ case IB_WR_FLUSH:
+ case IB_WR_ATOMIC_WRITE:
+ break;
default:
+ rxe_err_qp(qp, "unsupported wr opcode %d",
+ wr->opcode);
+ return -EINVAL;
break;
}
}
+
+ return 0;
}
static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
@@ -570,24 +813,27 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
int i;
for (i = 0; i < ibwr->num_sge; i++, sge++) {
- memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
+ memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length);
p += sge->length;
}
}
-static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
+static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
unsigned int mask, unsigned int length,
struct rxe_send_wqe *wqe)
{
int num_sge = ibwr->num_sge;
+ int err;
- init_send_wr(qp, &wqe->wr, ibwr);
+ err = init_send_wr(qp, &wqe->wr, ibwr);
+ if (err)
+ return err;
/* local operation */
if (unlikely(mask & WR_LOCAL_OP_MASK)) {
wqe->mask = mask;
wqe->state = wqe_state_posted;
- return;
+ return 0;
}
if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
@@ -606,82 +852,62 @@ static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
wqe->dma.sge_offset = 0;
wqe->state = wqe_state_posted;
wqe->ssn = atomic_add_return(1, &qp->ssn);
+
+ return 0;
}
-static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
- unsigned int mask, u32 length)
+static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr)
{
int err;
struct rxe_sq *sq = &qp->sq;
struct rxe_send_wqe *send_wqe;
- unsigned long flags;
+ unsigned int mask;
+ unsigned int length;
int full;
- err = validate_send_wr(qp, ibwr, mask, length);
+ err = validate_send_wr(qp, ibwr, &mask, &length);
if (err)
return err;
- spin_lock_irqsave(&qp->sq.sq_lock, flags);
-
full = queue_full(sq->queue, QUEUE_TYPE_FROM_ULP);
-
if (unlikely(full)) {
- spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
+ rxe_err_qp(qp, "send queue full");
return -ENOMEM;
}
send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_FROM_ULP);
- init_send_wqe(qp, ibwr, mask, length, send_wqe);
+ err = init_send_wqe(qp, ibwr, mask, length, send_wqe);
+ if (!err)
+ queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP);
- queue_advance_producer(sq->queue, QUEUE_TYPE_FROM_ULP);
-
- spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
-
- return 0;
+ return err;
}
-static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
+static int rxe_post_send_kernel(struct rxe_qp *qp,
+ const struct ib_send_wr *ibwr,
const struct ib_send_wr **bad_wr)
{
int err = 0;
- unsigned int mask;
- unsigned int length = 0;
- int i;
- struct ib_send_wr *next;
-
- while (wr) {
- mask = wr_opcode_mask(wr->opcode, qp);
- if (unlikely(!mask)) {
- err = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
- !(mask & WR_INLINE_MASK))) {
- err = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- next = wr->next;
-
- length = 0;
- for (i = 0; i < wr->num_sge; i++)
- length += wr->sg_list[i].length;
-
- err = post_one_send(qp, wr, mask, length);
+ unsigned long flags;
+ spin_lock_irqsave(&qp->sq.sq_lock, flags);
+ while (ibwr) {
+ err = post_one_send(qp, ibwr);
if (err) {
- *bad_wr = wr;
+ *bad_wr = ibwr;
break;
}
- wr = next;
+ ibwr = ibwr->next;
}
+ spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
- rxe_sched_task(&qp->req.task);
- if (unlikely(qp->req.state == QP_STATE_ERROR))
+ if (!err)
+ rxe_sched_task(&qp->req.task);
+
+ spin_lock_bh(&qp->state_lock);
+ if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->comp.task);
+ spin_unlock_bh(&qp->state_lock);
return err;
}
@@ -690,23 +916,88 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
struct rxe_qp *qp = to_rqp(ibqp);
+ int err;
- if (unlikely(!qp->valid)) {
- *bad_wr = wr;
+ spin_lock_bh(&qp->state_lock);
+ /* caller has already called destroy_qp */
+ if (WARN_ON_ONCE(!qp->valid)) {
+ spin_unlock_bh(&qp->state_lock);
+ rxe_err_qp(qp, "qp has been destroyed");
return -EINVAL;
}
- if (unlikely(qp->req.state < QP_STATE_READY)) {
+ if (unlikely(qp_state(qp) < IB_QPS_RTS)) {
+ spin_unlock_bh(&qp->state_lock);
*bad_wr = wr;
+ rxe_err_qp(qp, "qp not ready to send");
return -EINVAL;
}
+ spin_unlock_bh(&qp->state_lock);
if (qp->is_user) {
/* Utilize process context to do protocol processing */
rxe_run_task(&qp->req.task);
- return 0;
- } else
- return rxe_post_send_kernel(qp, wr, bad_wr);
+ } else {
+ err = rxe_post_send_kernel(qp, wr, bad_wr);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* recv wr */
+static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
+{
+ int i;
+ unsigned long length;
+ struct rxe_recv_wqe *recv_wqe;
+ int num_sge = ibwr->num_sge;
+ int full;
+ int err;
+
+ full = queue_full(rq->queue, QUEUE_TYPE_FROM_ULP);
+ if (unlikely(full)) {
+ err = -ENOMEM;
+ rxe_dbg("queue full");
+ goto err_out;
+ }
+
+ if (unlikely(num_sge > rq->max_sge)) {
+ err = -EINVAL;
+ rxe_dbg("bad num_sge > max_sge");
+ goto err_out;
+ }
+
+ length = 0;
+ for (i = 0; i < num_sge; i++)
+ length += ibwr->sg_list[i].length;
+
+ /* IBA max message size is 2^31 */
+ if (length >= (1UL<<31)) {
+ err = -EINVAL;
+ rxe_dbg("message length too long");
+ goto err_out;
+ }
+
+ recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_FROM_ULP);
+
+ recv_wqe->wr_id = ibwr->wr_id;
+ recv_wqe->dma.length = length;
+ recv_wqe->dma.resid = length;
+ recv_wqe->dma.num_sge = num_sge;
+ recv_wqe->dma.cur_sge = 0;
+ recv_wqe->dma.sge_offset = 0;
+ memcpy(recv_wqe->dma.sge, ibwr->sg_list,
+ num_sge * sizeof(struct ib_sge));
+
+ queue_advance_producer(rq->queue, QUEUE_TYPE_FROM_ULP);
+
+ return 0;
+
+err_out:
+ rxe_dbg("returned err = %d", err);
+ return err;
}
static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
@@ -717,13 +1008,26 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
struct rxe_rq *rq = &qp->rq;
unsigned long flags;
- if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
+ spin_lock_bh(&qp->state_lock);
+ /* caller has already called destroy_qp */
+ if (WARN_ON_ONCE(!qp->valid)) {
+ spin_unlock_bh(&qp->state_lock);
+ rxe_err_qp(qp, "qp has been destroyed");
+ return -EINVAL;
+ }
+
+ /* see C10-97.2.1 */
+ if (unlikely((qp_state(qp) < IB_QPS_INIT))) {
+ spin_unlock_bh(&qp->state_lock);
*bad_wr = wr;
+ rxe_dbg_qp(qp, "qp not ready to post recv");
return -EINVAL;
}
+ spin_unlock_bh(&qp->state_lock);
if (unlikely(qp->srq)) {
*bad_wr = wr;
+ rxe_dbg_qp(qp, "qp has srq, use post_srq_recv instead");
return -EINVAL;
}
@@ -740,76 +1044,102 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
spin_unlock_irqrestore(&rq->producer_lock, flags);
- if (qp->resp.state == QP_STATE_ERROR)
+ spin_lock_bh(&qp->state_lock);
+ if (qp_state(qp) == IB_QPS_ERR)
rxe_sched_task(&qp->resp.task);
+ spin_unlock_bh(&qp->state_lock);
return err;
}
+/* cq */
static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_udata *udata)
{
- int err;
struct ib_device *dev = ibcq->device;
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_create_cq_resp __user *uresp = NULL;
+ int err, cleanup_err;
if (udata) {
- if (udata->outlen < sizeof(*uresp))
- return -EINVAL;
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ rxe_dbg_dev(rxe, "malformed udata, err = %d", err);
+ goto err_out;
+ }
uresp = udata->outbuf;
}
- if (attr->flags)
- return -EOPNOTSUPP;
+ if (attr->flags) {
+ err = -EOPNOTSUPP;
+ rxe_dbg_dev(rxe, "bad attr->flags, err = %d", err);
+ goto err_out;
+ }
err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_dev(rxe, "bad init attributes, err = %d", err);
+ goto err_out;
+ }
+
+ err = rxe_add_to_pool(&rxe->cq_pool, cq);
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to create cq, err = %d", err);
+ goto err_out;
+ }
err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
uresp);
- if (err)
- return err;
-
- return rxe_add_to_pool(&rxe->cq_pool, cq);
-}
-
-static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
-{
- struct rxe_cq *cq = to_rcq(ibcq);
-
- /* See IBA C11-17: The CI shall return an error if this Verb is
- * invoked while a Work Queue is still associated with the CQ.
- */
- if (atomic_read(&cq->num_wq))
- return -EINVAL;
-
- rxe_cq_disable(cq);
+ if (err) {
+ rxe_dbg_cq(cq, "create cq failed, err = %d", err);
+ goto err_cleanup;
+ }
- rxe_cleanup(cq);
return 0;
+
+err_cleanup:
+ cleanup_err = rxe_cleanup(cq);
+ if (cleanup_err)
+ rxe_err_cq(cq, "cleanup failed, err = %d", cleanup_err);
+err_out:
+ rxe_err_dev(rxe, "returned err = %d", err);
+ return err;
}
static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
{
- int err;
struct rxe_cq *cq = to_rcq(ibcq);
struct rxe_dev *rxe = to_rdev(ibcq->device);
struct rxe_resize_cq_resp __user *uresp = NULL;
+ int err;
if (udata) {
- if (udata->outlen < sizeof(*uresp))
- return -EINVAL;
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ rxe_dbg_cq(cq, "malformed udata");
+ goto err_out;
+ }
uresp = udata->outbuf;
}
err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
- if (err)
- return err;
+ if (err) {
+ rxe_dbg_cq(cq, "bad attr, err = %d", err);
+ goto err_out;
+ }
- return rxe_cq_resize_queue(cq, cqe, uresp, udata);
+ err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
+ if (err) {
+ rxe_dbg_cq(cq, "resize cq failed, err = %d", err);
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ rxe_err_cq(cq, "returned err = %d", err);
+ return err;
}
static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
@@ -823,7 +1153,7 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
for (i = 0; i < num_entries; i++) {
cqe = queue_head(cq->queue, QUEUE_TYPE_TO_ULP);
if (!cqe)
- break;
+ break; /* queue empty */
memcpy(wc++, &cqe->ibwc, sizeof(*wc));
queue_advance_consumer(cq->queue, QUEUE_TYPE_TO_ULP);
@@ -864,6 +1194,32 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return ret;
}
+static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct rxe_cq *cq = to_rcq(ibcq);
+ int err;
+
+ /* See IBA C11-17: The CI shall return an error if this Verb is
+ * invoked while a Work Queue is still associated with the CQ.
+ */
+ if (atomic_read(&cq->num_wq)) {
+ err = -EINVAL;
+ rxe_dbg_cq(cq, "still in use");
+ goto err_out;
+ }
+
+ err = rxe_cleanup(cq);
+ if (err)
+ rxe_err_cq(cq, "cleanup failed, err = %d", err);
+
+ return 0;
+
+err_out:
+ rxe_err_cq(cq, "returned err = %d", err);
+ return err;
+}
+
+/* mr */
static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
{
struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -872,14 +1228,14 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
- goto err_out;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
err = rxe_add_to_pool(&rxe->mr_pool, mr);
- if (err)
+ if (err) {
+ rxe_dbg_dev(rxe, "unable to create mr");
goto err_free;
+ }
rxe_get(pd);
mr->ibmr.pd = ibpd;
@@ -891,47 +1247,49 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
err_free:
kfree(mr);
-err_out:
+ rxe_err_pd(pd, "returned err = %d", err);
return ERR_PTR(err);
}
-static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
- u64 start,
- u64 length,
- u64 iova,
- int access, struct ib_udata *udata)
+static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 iova, int access,
+ struct ib_udata *udata)
{
- int err;
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_mr *mr;
+ int err, cleanup_err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
- goto err_out;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
err = rxe_add_to_pool(&rxe->mr_pool, mr);
- if (err)
+ if (err) {
+ rxe_dbg_pd(pd, "unable to create mr");
goto err_free;
+ }
rxe_get(pd);
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
- if (err)
+ if (err) {
+ rxe_dbg_mr(mr, "reg_user_mr failed, err = %d", err);
goto err_cleanup;
+ }
rxe_finalize(mr);
return &mr->ibmr;
err_cleanup:
- rxe_cleanup(mr);
+ cleanup_err = rxe_cleanup(mr);
+ if (cleanup_err)
+ rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err);
err_free:
kfree(mr);
-err_out:
+ rxe_err_pd(pd, "returned err = %d", err);
return ERR_PTR(err);
}
@@ -941,17 +1299,19 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_mr *mr;
- int err;
+ int err, cleanup_err;
- if (mr_type != IB_MR_TYPE_MEM_REG)
- return ERR_PTR(-EINVAL);
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
+ if (mr_type != IB_MR_TYPE_MEM_REG) {
+ err = -EINVAL;
+ rxe_dbg_pd(pd, "mr type %d not supported, err = %d",
+ mr_type, err);
goto err_out;
}
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
err = rxe_add_to_pool(&rxe->mr_pool, mr);
if (err)
goto err_free;
@@ -961,20 +1321,49 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
mr->ibmr.device = ibpd->device;
err = rxe_mr_init_fast(max_num_sg, mr);
- if (err)
+ if (err) {
+ rxe_dbg_mr(mr, "alloc_mr failed, err = %d", err);
goto err_cleanup;
+ }
rxe_finalize(mr);
return &mr->ibmr;
err_cleanup:
- rxe_cleanup(mr);
+ cleanup_err = rxe_cleanup(mr);
+ if (cleanup_err)
+ rxe_err_mr(mr, "cleanup failed, err = %d", err);
err_free:
kfree(mr);
err_out:
+ rxe_err_pd(pd, "returned err = %d", err);
return ERR_PTR(err);
}
+static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct rxe_mr *mr = to_rmr(ibmr);
+ int err, cleanup_err;
+
+ /* See IBA 10.6.7.2.6 */
+ if (atomic_read(&mr->num_mw) > 0) {
+ err = -EINVAL;
+ rxe_dbg_mr(mr, "mr has mw's bound");
+ goto err_out;
+ }
+
+ cleanup_err = rxe_cleanup(mr);
+ if (cleanup_err)
+ rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err);
+
+ kfree_rcu(mr);
+ return 0;
+
+err_out:
+ rxe_err_mr(mr, "returned err = %d", err);
+ return err;
+}
+
static ssize_t parent_show(struct device *device,
struct device_attribute *attr, char *buf)
{
@@ -1095,7 +1484,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
- rxe_dbg(rxe, "failed with error %d\n", err);
+ rxe_dbg_dev(rxe, "failed with error %d\n", err);
/*
* Note that rxe may be invalid at this point if another thread
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index c269ae2a3224..26a20f088692 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -63,9 +63,7 @@ struct rxe_cq {
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
- bool is_dying;
bool is_user;
- struct tasklet_struct comp_task;
atomic_t num_wq;
};
@@ -104,17 +102,7 @@ struct rxe_srq {
int error;
};
-enum rxe_qp_state {
- QP_STATE_RESET,
- QP_STATE_INIT,
- QP_STATE_READY,
- QP_STATE_DRAIN, /* req only */
- QP_STATE_DRAINED, /* req only */
- QP_STATE_ERROR
-};
-
struct rxe_req_info {
- enum rxe_qp_state state;
int wqe_index;
u32 psn;
int opcode;
@@ -129,7 +117,6 @@ struct rxe_req_info {
};
struct rxe_comp_info {
- enum rxe_qp_state state;
u32 psn;
int opcode;
int timeout;
@@ -175,7 +162,6 @@ struct resp_res {
};
struct rxe_resp_info {
- enum rxe_qp_state state;
u32 msn;
u32 psn;
u32 ack_psn;
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index dacc174604bf..65b5cda5457b 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -437,9 +437,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
dev_dbg(&netdev->dev, "siw: event %lu\n", event);
- if (dev_net(netdev) != &init_net)
- return NOTIFY_OK;
-
base_dev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_SIW);
if (!base_dev)
return NOTIFY_OK;
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index fd721cc19682..58bbf738e4e5 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -139,7 +139,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx,
break;
bytes = min(bytes, len);
- if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) ==
+ if (siw_rx_kva(srx, ib_virt_dma_to_ptr(buf_addr), bytes) ==
bytes) {
copied += bytes;
offset += bytes;
@@ -487,7 +487,7 @@ int siw_proc_send(struct siw_qp *qp)
mem_p = *mem;
if (mem_p->mem_obj == NULL)
rv = siw_rx_kva(srx,
- (void *)(uintptr_t)(sge->laddr + frx->sge_off),
+ ib_virt_dma_to_ptr(sge->laddr + frx->sge_off),
sge_bytes);
else if (!mem_p->is_pbl)
rv = siw_rx_umem(srx, mem_p->umem,
@@ -852,7 +852,7 @@ int siw_proc_rresp(struct siw_qp *qp)
if (mem_p->mem_obj == NULL)
rv = siw_rx_kva(srx,
- (void *)(uintptr_t)(sge->laddr + wqe->processed),
+ ib_virt_dma_to_ptr(sge->laddr + wqe->processed),
bytes);
else if (!mem_p->is_pbl)
rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 05052b49107f..4b292e0504f1 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
if (paddr)
- return virt_to_page((void *)(uintptr_t)paddr);
+ return ib_virt_dma_to_page(paddr);
return NULL;
}
@@ -56,8 +56,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
if (!mem->mem_obj) {
/* Kernel client using kva */
- memcpy(paddr,
- (const void *)(uintptr_t)sge->laddr, bytes);
+ memcpy(paddr, ib_virt_dma_to_ptr(sge->laddr), bytes);
} else if (c_tx->in_syscall) {
if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr),
bytes))
@@ -477,7 +476,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
* or memory region with assigned kernel buffer
*/
iov[seg].iov_base =
- (void *)(uintptr_t)(sge->laddr + sge_off);
+ ib_virt_dma_to_ptr(sge->laddr + sge_off);
iov[seg].iov_len = sge_len;
if (do_crc)
@@ -537,19 +536,13 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
* Cast to an uintptr_t to preserve all 64 bits
* in sge->laddr.
*/
- uintptr_t va = (uintptr_t)(sge->laddr + sge_off);
+ u64 va = sge->laddr + sge_off;
- /*
- * virt_to_page() takes a (void *) pointer
- * so cast to a (void *) meaning it will be 64
- * bits on a 64 bit platform and 32 bits on a
- * 32 bit platform.
- */
- page_array[seg] = virt_to_page((void *)(va & PAGE_MASK));
+ page_array[seg] = ib_virt_dma_to_page(va);
if (do_crc)
crypto_shash_update(
c_tx->mpa_crc_hd,
- (void *)va,
+ ib_virt_dma_to_ptr(va),
plen);
}
@@ -558,7 +551,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
data_len -= plen;
fp_off = 0;
- if (++seg > (int)MAX_ARRAY) {
+ if (++seg >= (int)MAX_ARRAY) {
siw_dbg_qp(tx_qp(c_tx), "to many fragments\n");
siw_unmap_pages(iov, kmap_mask, seg-1);
wqe->processed -= c_tx->bytes_unsent;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 906fde1a2a0d..398ec13db624 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -660,7 +660,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
bytes = -EINVAL;
break;
}
- memcpy(kbuf, (void *)(uintptr_t)core_sge->addr,
+ memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr),
core_sge->length);
kbuf += core_sge->length;
@@ -1523,7 +1523,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle,
}
siw_dbg_mem(mem,
"sge[%d], size %u, addr 0x%p, total %lu\n",
- i, pble->size, (void *)(uintptr_t)pble->addr,
+ i, pble->size, ib_virt_dma_to_ptr(pble->addr),
pbl_size);
}
rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page);