summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/hfi1/tid_rdma.h
diff options
context:
space:
mode:
authorKaike Wan <kaike.wan@intel.com>2019-01-23 19:30:07 -0800
committerDoug Ledford <dledford@redhat.com>2019-02-05 17:53:55 -0500
commit838b6fd2d9ca29998869e4d1ecf4566efe807666 (patch)
treee55c782966d8711adc080ec9c865863f56909107 /drivers/infiniband/hw/hfi1/tid_rdma.h
parent37356e78328186814e994e0ad1a1cfd6a142bef4 (diff)
downloadlinux-stable-838b6fd2d9ca29998869e4d1ecf4566efe807666.tar.gz
linux-stable-838b6fd2d9ca29998869e4d1ecf4566efe807666.tar.bz2
linux-stable-838b6fd2d9ca29998869e4d1ecf4566efe807666.zip
IB/hfi1: TID RDMA RcvArray programming and TID allocation
TID entries are used by hfi1 hardware to receive data payload from incoming packets directly into a user buffer and thus avoid data copying by software. This patch implements the functions for TID allocation, freeing, and programming TID RcvArray entries in hardware for kernel clients. TID entries are managed via lists of TID groups similar to PSM. Furthermore, to track TID resource allocation for each request, software flows are also allocated and freed as needed. Since software flows consume large amount of memory for tracking TID allocation and freeing, it is generally desirable to allocate them dynamically in the send queue and only for TID RDMA requests, but pre-allocate them for receive queue because the send queue could have thousands of entries while the receive queue has only a limited number of entries. Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Kaike Wan <kaike.wan@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/tid_rdma.h')
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h101
1 files changed, 101 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 3bc0aaf9568f..524baf8c8fac 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -6,7 +6,16 @@
#ifndef HFI1_TID_RDMA_H
#define HFI1_TID_RDMA_H
+#include <linux/circ_buf.h>
+#include "common.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
+#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
struct tid_rdma_params {
struct rcu_head rcu_head;
@@ -36,6 +45,81 @@ struct tid_flow_state {
u8 flags;
};
+struct tid_rdma_request {
+ struct rvt_qp *qp;
+ struct hfi1_ctxtdata *rcd;
+ union {
+ struct rvt_swqe *swqe;
+ struct rvt_ack_entry *ack;
+ } e;
+
+ struct tid_rdma_flow *flows; /* array of tid flows */
+ u16 n_flows; /* size of the flow buffer window */
+ u16 setup_head; /* flow index we are setting up */
+ u16 clear_tail; /* flow index we are clearing */
+ u16 flow_idx; /* flow index most recently set up */
+
+ u32 seg_len;
+
+ u32 isge; /* index of "current" sge */
+};
+
+/*
+ * When header suppression is used, PSNs associated with a "flow" are
+ * relevant (and not the PSNs maintained by verbs). Track per-flow
+ * PSNs here for a TID RDMA segment.
+ *
+ */
+struct flow_state {
+ u32 flags;
+ u32 resp_ib_psn; /* The IB PSN of the response for this flow */
+ u32 generation; /* generation of flow */
+ u32 spsn; /* starting PSN in TID space */
+ u32 lpsn; /* last PSN in TID space */
+ u32 r_next_psn; /* next PSN to be received (in TID space) */
+};
+
+struct tid_rdma_pageset {
+ dma_addr_t addr : 48; /* Only needed for the first page */
+ u8 idx: 8;
+ u8 count : 7;
+ u8 mapped: 1;
+};
+
+/**
+ * kern_tid_node - used for managing TID's in TID groups
+ *
+ * @grp_idx: rcd relative index to tid_group
+ * @map: grp->map captured prior to programming this TID group in HW
+ * @cnt: Only @cnt of available group entries are actually programmed
+ */
+struct kern_tid_node {
+ struct tid_group *grp;
+ u8 map;
+ u8 cnt;
+};
+
+/* Overall info for a TID RDMA segment */
+struct tid_rdma_flow {
+ /*
+ * While a TID RDMA segment is being transferred, it uses a QP number
+ * from the "KDETH section of QP numbers" (which is different from the
+ * QP number that originated the request). Bits 11-15 of these QP
+ * numbers identify the "TID flow" for the segment.
+ */
+ struct flow_state flow_state;
+ struct tid_rdma_request *req;
+ u32 length;
+ u8 tnode_cnt;
+ u8 tidcnt;
+ u8 idx;
+ u8 npagesets;
+ u8 npkts;
+ struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
+ struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
+ u32 tid_entry[TID_RDMA_MAX_PAGES];
+};
+
bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
@@ -43,6 +127,23 @@ void tid_rdma_conn_error(struct rvt_qp *qp);
void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
+ struct rvt_sge_state *ss, bool *last);
+int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
+void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+/**
+ * trdma_clean_swqe - clean flows for swqe if large send queue
+ * @qp: the qp
+ * @wqe: the send wqe
+ */
+static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ if (!wqe->priv)
+ return;
+ __trdma_clean_swqe(qp, wqe);
+}
int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
struct ib_qp_init_attr *init_attr);