summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael J. Ruhl <michael.j.ruhl@intel.com>2018-09-10 09:49:27 -0700
committerJason Gunthorpe <jgg@mellanox.com>2018-09-11 09:55:02 -0600
commit0b79b27748cbec221e1ceabf63578198602bf01d (patch)
treea62ba4181d233bc314b46b3d12f62fe68e34d5b4
parent3e5d60bcc8a42bfd0c888a0cf52a5a7e8398677d (diff)
downloadlinux-stable-0b79b27748cbec221e1ceabf63578198602bf01d.tar.gz
linux-stable-0b79b27748cbec221e1ceabf63578198602bf01d.tar.bz2
linux-stable-0b79b27748cbec221e1ceabf63578198602bf01d.zip
IB/{hfi1, qib, rdmavt}: Schedule multi RC/UC packets instead of posting
The post_send() path determines if it should post directly or, schedule the post for later. The current logic is: if the swqe ring is empty or (for hfi1) wqe->length <= piothreshold post the send else schedule This can allow large requests to call the send engine directly. Large requests can potentially produce a large number of packets prior to returning to the caller, blocking the caller from posting more requests, and allowing better parallel processing. Allow the driver(s) more say in this logic (pass call_send to the driver, rather than examining a return value). Update hfi1/qib logic to schedule the send engine if an RC or UC message is larger than the QP MTU size. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c12
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h3
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c14
-rw-r--r--include/rdma/rdma_vt.h10
6 files changed, 32 insertions, 27 deletions
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..54d9ff171059 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -285,17 +285,13 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
* hfi1_check_send_wqe - validate wqe
* @qp - The qp
* @wqe - The built wqe
- *
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * @call_send - Determine if the send should be posted or scheduled.
*
* Returns 0 on success, -EINVAL on failure
*
*/
int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+ struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
@@ -305,6 +301,8 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
ah = ibah_to_rvtah(wqe->ud_wr.ah);
@@ -321,7 +319,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default:
break;
}
- return wqe->length <= piothreshold;
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..269ec338581b 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -343,7 +343,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401915f7..a81905df2d0f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled
*
- * validate wr/wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
- *
- * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ * Returns 0 on success, -EINVAL on failure
*/
int qib_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+ struct rvt_swqe *wqe, bool *call_send)
{
struct rvt_ah *ah;
- int ret = 0;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
/* progress hint */
- ret = 1;
+ *call_send = true;
break;
default:
break;
}
- return ret;
+ return 0;
}
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 666613eef88f..3d7b744ae8fb 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -303,7 +303,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
-int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5ce403c6cddb..a9b7d7ff32ee 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1718,7 +1718,7 @@ static inline int rvt_qp_is_avail(
*/
static int rvt_post_one_wr(struct rvt_qp *qp,
const struct ib_send_wr *wr,
- int *call_send)
+ bool *call_send)
{
struct rvt_swqe *wqe;
u32 next;
@@ -1825,11 +1825,9 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
/* general part of wqe valid - allow for driver checks */
if (rdi->driver_f.check_send_wqe) {
- ret = rdi->driver_f.check_send_wqe(qp, wqe);
+ ret = rdi->driver_f.check_send_wqe(qp, wqe, call_send);
if (ret < 0)
goto bail_inval_free;
- if (ret)
- *call_send = ret;
}
log_pmtu = qp->log_pmtu;
@@ -1897,7 +1895,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
unsigned long flags = 0;
- int call_send;
+ bool call_send;
unsigned nreq = 0;
int err = 0;
@@ -1930,7 +1928,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
bail:
spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) {
- if (call_send)
+ /*
+ * Only call do_send if there is exactly one packet, and the
+ * driver said it was ok.
+ */
+ if (nreq == 1 && call_send)
rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a0cf01..e32facdd9fd3 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -214,8 +214,14 @@ struct rvt_driver_provided {
void (*schedule_send)(struct rvt_qp *qp);
void (*schedule_send_no_lock)(struct rvt_qp *qp);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+ /*
+ * Validate the wqe. This needs to be done prior to inserting the
+ * wqe into the ring, but after the wqe has been set up. Allow for
+ * driver specific work request checking by providing a callback.
+ * call_send indicates if the wqe should be posted or scheduled.
+ */
+ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
/*
* Sometimes rdmavt needs to kick the driver's send progress. That is