diff options
author | Mike Marciniszyn <mike.marciniszyn@intel.com> | 2016-02-14 12:10:04 -0800 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-03-10 20:38:07 -0500 |
commit | 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 (patch) | |
tree | 077ea38ac2f7fd75c1334deadcc141ad6753a009 /drivers/infiniband/hw/qib/qib_rc.c | |
parent | 20f333b61300fa658952713ca9b8b4b72bbaed9f (diff) | |
download | linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.gz linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.tar.bz2 linux-46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7.zip |
IB/qib, staging/rdma/hfi1: add s_hlock for use in post send
This patch adds an additional lock to reduce contention on the s_lock.
This lock is used in post_send() so that the post_send is not
serialized with the send engine and other send related processing.
To do this the s_next_psn is now maintained on post_send() while
post_send() related fields are moved to a new cache line. There is
an s_avail maintained for the post_send() to mitigate trading cache
lines with the send engine. The lock is released/acquired around
releasing the just built packet to the egress mechanism.
Reviewed-by: Jubin John <jubin.john@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Harish Chegondi <harish.chegondi@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_rc.c')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_rc.c | 44 |
1 files changed, 9 insertions, 35 deletions
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index ce886b2ade74..9088e26d3ac8 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -226,6 +226,8 @@ bail: * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_rc_req(struct rvt_qp *qp) @@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int delta; @@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) @@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = @@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - if (qib_cmp24(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ |