diff options
Diffstat (limited to 'drivers/infiniband/hw/mana')
-rw-r--r-- | drivers/infiniband/hw/mana/Makefile | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/ah.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/counters.c | 105 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/counters.h | 44 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/cq.c | 228 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/device.c | 82 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/main.c | 103 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/mana_ib.h | 210 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/mr.c | 105 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/qp.c | 245 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/shadow_queue.h | 115 | ||||
-rw-r--r-- | drivers/infiniband/hw/mana/wr.c | 168 |
12 files changed, 1420 insertions, 45 deletions
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile index 88655fe5e398..921c05e08b11 100644 --- a/drivers/infiniband/hw/mana/Makefile +++ b/drivers/infiniband/hw/mana/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o -mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c new file mode 100644 index 000000000000..f56952eebbaa --- /dev/null +++ b/drivers/infiniband/hw/mana/ah.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + struct rdma_ah_attr *ah_attr = attr->ah_attr; + const struct ib_global_route *grh; + enum rdma_network_type ntype; + + if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE || + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + + if (udata) + return -EINVAL; + + ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle); + if (!ah->av) + return -ENOMEM; + + grh = rdma_ah_read_grh(ah_attr); + ntype = rdma_gid_attr_network_type(grh->sgid_attr); + + copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN); + ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label); + ah->av->hop_limit = grh->hop_limit; + ah->av->dscp = (grh->traffic_class >> 2) & 0x3f; + ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6); + + if (ah->av->is_ipv6) { + copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16); + copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16); + } else { + ah->av->dest_ip[10] = 0xFF; + ah->av->dest_ip[11] = 0xFF; + copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4); + copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4); + } + + return 0; +} + +int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah); + + dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c new file mode 100644 index 000000000000..e533ce21013d --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "counters.h" + +static const struct rdma_stat_desc mana_ib_port_stats_desc[] = { + [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout", + [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak", + [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak", + [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak", + [MANA_IB_RESPONDER_OOS].name = "responder_oos", + [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request", + [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak", + [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch", + [MANA_IB_NAK_INV_REQ].name = "nak_inv_req", + [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error", + [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error", + [MANA_IB_NAK_INV_READ].name = "nak_inv_read", + [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error", + [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error", + [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error", + [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error", + [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe", + [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error", + [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded", + [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded", + [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error", + [MANA_IB_RECEIVED_CNPS].name = "received_cnps", + [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested", + [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events", + [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered", + [MANA_IB_CURRENT_RATE].name = "current_rate", +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num) +{ + return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc, + ARRAY_SIZE(mana_ib_port_stats_desc), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index) +{ + struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, + ib_dev); + struct mana_rnic_query_vf_cntrs_resp resp = {}; + struct mana_rnic_query_vf_cntrs_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS, + sizeof(req), sizeof(resp)); + req.hdr.dev_id = mdev->gdma_dev->dev_id; + req.adapter = mdev->adapter_handle; + + err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req, + sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d", + err); + return err; + } + + stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout; + stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak; + stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak; + stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak; + stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos; + stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request; + stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] = + resp.requester_implicit_nak; + stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] = + resp.requester_readresp_psn_mismatch; + stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req; + stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err; + stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err; + stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read; + stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] = + resp.responder_local_len_err; + stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] = + resp.requestor_local_prot_err; + stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] = + resp.responder_rem_access_err; + stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] = + resp.responder_local_qp_err; + stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] = + resp.responder_malformed_wqe; + stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err; + stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] = + resp.requester_rnr_nak_retries_exceeded; + stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] = + resp.requester_retries_exceeded; + stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err; + + stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps; + stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested; + stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events; + stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered; + stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate; + + return ARRAY_SIZE(mana_ib_port_stats_desc); +} diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h new file mode 100644 index 000000000000..7ff92d27f6c3 --- /dev/null +++ b/drivers/infiniband/hw/mana/counters.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Microsoft Corporation. All rights reserved. + */ + +#ifndef _COUNTERS_H_ +#define _COUNTERS_H_ + +#include "mana_ib.h" + +enum mana_ib_port_counters { + MANA_IB_REQUESTER_TIMEOUT, + MANA_IB_REQUESTER_OOS_NAK, + MANA_IB_REQUESTER_RNR_NAK, + MANA_IB_RESPONDER_RNR_NAK, + MANA_IB_RESPONDER_OOS, + MANA_IB_RESPONDER_DUP_REQUEST, + MANA_IB_REQUESTER_IMPLICIT_NAK, + MANA_IB_REQUESTER_READRESP_PSN_MISMATCH, + MANA_IB_NAK_INV_REQ, + MANA_IB_NAK_ACCESS_ERR, + MANA_IB_NAK_OPP_ERR, + MANA_IB_NAK_INV_READ, + MANA_IB_RESPONDER_LOCAL_LEN_ERR, + MANA_IB_REQUESTOR_LOCAL_PROT_ERR, + MANA_IB_RESPONDER_REM_ACCESS_ERR, + MANA_IB_RESPONDER_LOCAL_QP_ERR, + MANA_IB_RESPONDER_MALFORMED_WQE, + MANA_IB_GENERAL_HW_ERR, + MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED, + MANA_IB_REQUESTER_RETRIES_EXCEEDED, + MANA_IB_TOTAL_FATAL_ERR, + MANA_IB_RECEIVED_CNPS, + MANA_IB_NUM_QPS_CONGESTED, + MANA_IB_RATE_INC_EVENTS, + MANA_IB_NUM_QPS_RECOVERED, + MANA_IB_CURRENT_RATE, +}; + +struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev, + u32 port_num); +int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u32 port_num, int index); +#endif /* _COUNTERS_H_ */ diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c index f04a679d2871..0fc4e2679218 100644 --- a/drivers/infiniband/hw/mana/cq.c +++ b/drivers/infiniband/hw/mana/cq.c @@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_device *ibdev = ibcq->device; struct mana_ib_create_cq ucmd = {}; struct mana_ib_dev *mdev; + struct gdma_context *gc; bool is_rnic_cq; u32 doorbell; + u32 buf_size; int err; mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev_to_gc(mdev); cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors; cq->cq_handle = INVALID_MANA_HANDLE; - if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) - return -EINVAL; + if (udata) { + if (udata->inlen < offsetof(struct mana_ib_create_cq, flags)) + return -EINVAL; - err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); - if (err) { - ibdev_dbg(ibdev, - "Failed to copy from udata for create cq, %d\n", err); - return err; - } + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err); + return err; + } - is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); + is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ); - if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) { - ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); - return -EINVAL; - } + if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) || + attr->cqe > U32_MAX / COMP_ENTRY_SIZE) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } - cq->cqe = attr->cqe; - err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue); - if (err) { - ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); - return err; - } + cq->cqe = attr->cqe; + err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err); + return err; + } - mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, - ibucontext); - doorbell = mana_ucontext->doorbell; + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + doorbell = mana_ucontext->doorbell; + } else { + is_rnic_cq = true; + buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE)); + cq->cqe = buf_size / COMP_ENTRY_SIZE; + err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue); + if (err) { + ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err); + return err; + } + doorbell = gc->mana_ib.doorbell; + } if (is_rnic_cq) { err = mana_ib_gd_create_cq(mdev, cq, doorbell); @@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } } - resp.cqid = cq->queue.id; - err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); - if (err) { - ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); - goto err_remove_cq_cb; + if (udata) { + resp.cqid = cq->queue.id; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto err_remove_cq_cb; + } } + spin_lock_init(&cq->cq_lock); + INIT_LIST_HEAD(&cq->list_send_qp); + INIT_LIST_HEAD(&cq->list_recv_qp); + return 0; err_remove_cq_cb: @@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) return -EINVAL; /* Create CQ table entry */ WARN_ON(gc->cq_table[cq->queue.id]); - gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); + if (cq->queue.kmem) + gdma_cq = cq->queue.kmem; + else + gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL); if (!gdma_cq) return -ENOMEM; @@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq) if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID) return; + if (cq->queue.kmem) + /* Then it will be cleaned and removed by the mana */ + return; + kfree(gc->cq_table[cq->queue.id]); gc->cq_table[cq->queue.id] = NULL; } + +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct gdma_queue *gdma_cq = cq->queue.kmem; + + if (!gdma_cq) + return -EINVAL; + + mana_gd_ring_cq(gdma_cq, SET_ARM_BIT); + return 0; +} + +static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct ud_sq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq); + if (!shadow_wqe) + return; + + shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_sq); +} + +static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe) +{ + struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data; + struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct ud_rq_shadow_wqe *shadow_wqe; + + shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq); + if (!shadow_wqe) + return; + + shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len; + shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn; + shadow_wqe->header.error_code = IB_WC_SUCCESS; + + wq->tail += shadow_wqe->header.posted_wqe_size; + shadow_queue_advance_next_to_complete(&qp->shadow_rq); +} + +static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe) +{ + struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq); + + if (!qp) + return; + + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) { + if (cqe->is_sq) + handle_ud_sq_cqe(qp, cqe); + else + handle_ud_rq_cqe(qp, cqe); + } + + mana_put_qp_ref(qp); +} + +static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc, + const struct shadow_wqe_header *shadow_wqe) +{ + const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe; + + wc->wr_id = shadow_wqe->wr_id; + wc->status = shadow_wqe->error_code; + wc->opcode = shadow_wqe->opcode; + wc->vendor_err = shadow_wqe->error_code; + wc->wc_flags = 0; + wc->qp = &qp->ibqp; + wc->pkey_index = 0; + + if (shadow_wqe->opcode == IB_WC_RECV) { + wc->byte_len = ud_wqe->byte_len; + wc->src_qp = ud_wqe->src_qpn; + wc->wc_flags |= IB_WC_GRH; + } +} + +static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc) +{ + struct shadow_wqe_header *shadow_wqe; + struct mana_ib_qp *qp; + int wc_index = 0; + + /* process send shadow queue completions */ + list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_sq); + wc_index++; + } + } + + /* process recv shadow queue completions */ + list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { + while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq)) + != NULL) { + if (wc_index >= nwc) + goto out; + + fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe); + shadow_queue_advance_consumer(&qp->shadow_rq); + wc_index++; + } + } + +out: + return wc_index; +} + +int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev); + struct gdma_queue *queue = cq->queue.kmem; + struct gdma_comp gdma_cqe; + unsigned long flags; + int num_polled = 0; + int comp_read, i; + + spin_lock_irqsave(&cq->cq_lock, flags); + for (i = 0; i < num_entries; i++) { + comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1); + if (comp_read < 1) + break; + mana_handle_cqe(mdev, &gdma_cqe); + } + + num_polled = mana_process_completions(cq, num_entries, wc); + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return num_polled; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 3416a85f8738..b31089320aa5 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = { .add_gid = mana_ib_gd_add_gid, .alloc_pd = mana_ib_alloc_pd, .alloc_ucontext = mana_ib_alloc_ucontext, + .create_ah = mana_ib_create_ah, .create_cq = mana_ib_create_cq, .create_qp = mana_ib_create_qp, .create_rwq_ind_table = mana_ib_create_rwq_ind_table, @@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = { .dealloc_ucontext = mana_ib_dealloc_ucontext, .del_gid = mana_ib_gd_del_gid, .dereg_mr = mana_ib_dereg_mr, + .destroy_ah = mana_ib_destroy_ah, .destroy_cq = mana_ib_destroy_cq, .destroy_qp = mana_ib_destroy_qp, .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, .destroy_wq = mana_ib_destroy_wq, .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_dma_mr = mana_ib_get_dma_mr, .get_link_layer = mana_ib_get_link_layer, .get_port_immutable = mana_ib_get_port_immutable, .mmap = mana_ib_mmap, .modify_qp = mana_ib_modify_qp, .modify_wq = mana_ib_modify_wq, + .poll_cq = mana_ib_poll_cq, + .post_recv = mana_ib_post_recv, + .post_send = mana_ib_post_send, .query_device = mana_ib_query_device, .query_gid = mana_ib_query_gid, .query_pkey = mana_ib_query_pkey, .query_port = mana_ib_query_port, .reg_user_mr = mana_ib_reg_user_mr, + .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf, + .req_notify_cq = mana_ib_arm_cq, + INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), @@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = { ib_ind_table), }; +static const struct ib_device_ops mana_ib_stats_ops = { + .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats, + .get_hw_stats = mana_ib_get_hw_stats, +}; + +static int mana_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb); + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + struct gdma_context *gc = dev->gdma_dev->gdma_context; + struct mana_context *mc = gc->mana.driver_data; + struct net_device *ndev; + + /* Only process events from our parent device */ + if (event_dev != mc->ports[0]) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); + /* + * RDMA core will setup GID based on updated netdev. + * It's not possible to race with the core as rtnl lock is being + * held. + */ + ib_device_set_netdev(&dev->ib_dev, ndev, 1); + + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + static int mana_ib_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { @@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues; dev->ib_dev.dev.parent = mdev->gdma_context->dev; - rcu_read_lock(); /* required to get primary netdev */ - ndev = mana_get_primary_netdev_rcu(mc, 0); + ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker); if (!ndev) { - rcu_read_unlock(); ret = -ENODEV; ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1"); goto free_ib_device; @@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev, ether_addr_copy(mac_addr, ndev->dev_addr); addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr); ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1); - rcu_read_unlock(); + /* mana_get_primary_netdev() returns ndev with refcount held */ + netdev_put(ndev, &dev->dev_tracker); if (ret) { ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret); goto free_ib_device; @@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev, } dev->gdma_dev = &mdev->gdma_context->mana_ib; + dev->nb.notifier_call = mana_ib_netdev_event; + ret = register_netdevice_notifier(&dev->nb); + if (ret) { + ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", + ret); + goto deregister_device; + } + ret = mana_ib_gd_query_adapter_caps(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } + ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops); + ret = mana_ib_create_eqs(dev); if (ret) { ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret); - goto deregister_device; + goto deregister_net_notifier; } ret = mana_ib_gd_create_rnic_adapter(dev); @@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev, goto destroy_rnic; } + dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev, + MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0); + if (!dev->av_pool) { + ret = -ENOMEM; + goto destroy_rnic; + } + ret = ib_register_device(&dev->ib_dev, "mana_%d", mdev->gdma_context->dev); if (ret) - goto destroy_rnic; + goto deallocate_pool; dev_set_drvdata(&adev->dev, dev); return 0; +deallocate_pool: + dma_pool_destroy(dev->av_pool); destroy_rnic: xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: mana_ib_destroy_eqs(dev); +deregister_net_notifier: + unregister_netdevice_notifier(&dev->nb); deregister_device: mana_gd_deregister_device(dev->gdma_dev); free_ib_device: @@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); + dma_pool_destroy(dev->av_pool); xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); mana_ib_destroy_eqs(dev); + unregister_netdevice_notifier(&dev->nb); mana_gd_deregister_device(dev->gdma_dev); ib_dealloc_device(&dev->ib_dev); } diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 457cea6d9909..eda9c5b971de 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), sizeof(resp)); + if (!udata) + flags |= GDMA_PD_FLAG_ALLOW_GPA_MR; + req.flags = flags; err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); @@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); } +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue) +{ + struct gdma_context *gc = mdev_to_gc(mdev); + struct gdma_queue_spec spec = {}; + int err; + + queue->id = INVALID_QUEUE_ID; + queue->gdma_region = GDMA_INVALID_DMA_REGION; + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = size; + err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem); + if (err) + return err; + /* take ownership into mana_ib from mana */ + queue->gdma_region = queue->kmem->mem_info.dma_region_handle; + queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + return 0; +} + int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue) { @@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue */ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region); ib_umem_release(queue->umem); + if (queue->kmem) + mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem); } static int @@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; @@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; - if (port_num == 1) + if (port_num == 1) { immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + } return 0; } @@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port, props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_EDR; props->pkey_tbl_len = 1; - if (port == 1) + if (port == 1) { props->gid_tbl_len = 16; + props->port_cap_flags = IB_PORT_CM_SUP; + props->ip_gids = true; + } return 0; } @@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req), sizeof(resp)); - req.hdr.resp.msg_version = GDMA_MESSAGE_V3; + req.hdr.resp.msg_version = GDMA_MESSAGE_V4; req.hdr.dev_id = dev->gdma_dev->dev_id; err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), @@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) caps->max_inline_data_size = resp.max_inline_data_size; caps->max_send_sge_count = resp.max_send_sge_count; caps->max_recv_sge_count = resp.max_recv_sge_count; + caps->feature_flags = resp.feature_flags; return 0; } @@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event) switch (event->type) { case GDMA_EQE_RNIC_QP_FATAL: qpn = event->details[0]; - qp = mana_get_qp_ref(mdev, qpn); + qp = mana_get_qp_ref(mdev, qpn, false); if (!qp) break; if (qp->ibqp.event_handler) { @@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev) req.hdr.dev_id = gc->mana_ib.dev_id; req.notify_eq_id = mdev->fatal_err_eq->id; + if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT) + req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); if (err) { ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err); @@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) } return 0; } + +int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, + struct ib_qp_init_attr *attr, u32 doorbell, u32 type) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd); + struct gdma_context *gc = mdev_to_gc(mdev); + struct mana_rnic_create_udqp_resp resp = {}; + struct mana_rnic_create_udqp_req req = {}; + int err, i; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.pd_handle = pd->pd_handle; + req.send_cq_handle = send_cq->cq_handle; + req.recv_cq_handle = recv_cq->cq_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) + req.dma_region[i] = qp->ud_qp.queues[i].gdma_region; + req.doorbell_page = doorbell; + req.max_send_wr = attr->cap.max_send_wr; + req.max_recv_wr = attr->cap.max_recv_wr; + req.max_send_sge = attr->cap.max_send_sge; + req.max_recv_sge = attr->cap.max_recv_sge; + req.qp_type = type; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err); + return err; + } + qp->qp_handle = resp.qp_handle; + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) { + qp->ud_qp.queues[i].id = resp.queue_ids[i]; + /* The GDMA regions are now owned by the RNIC QP handle */ + qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION; + } + return 0; +} + +int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + struct mana_rnic_destroy_udqp_resp resp = {0}; + struct mana_rnic_destroy_udqp_req req = {0}; + struct gdma_context *gc = mdev_to_gc(mdev); + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp)); + req.hdr.dev_id = gc->mana_ib.dev_id; + req.adapter = mdev->adapter_handle; + req.qp_handle = qp->qp_handle; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err); + return err; + } + return 0; +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index b53a5b4de908..6903946677e5 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -11,8 +11,11 @@ #include <rdma/ib_umem.h> #include <rdma/mana-abi.h> #include <rdma/uverbs_ioctl.h> +#include <linux/dmapool.h> #include <net/mana/mana.h> +#include "shadow_queue.h" +#include "counters.h" #define PAGE_SZ_BM \ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \ @@ -21,6 +24,9 @@ /* MANA doesn't have any limit for MR size */ #define MANA_IB_MAX_MR_SIZE U64_MAX +/* Send queue ID mask */ +#define MANA_SENDQ_MASK BIT(31) + /* * The hardware limit of number of MRs is greater than maximum number of MRs * that can possibly represent in 24 bits @@ -32,6 +38,11 @@ */ #define MANA_CA_ACK_DELAY 16 +/* + * The buffer used for writing AV + */ +#define MANA_AV_BUFFER_SIZE 64 + struct mana_ib_adapter_caps { u32 max_sq_id; u32 max_rq_id; @@ -48,10 +59,12 @@ struct mana_ib_adapter_caps { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; struct mana_ib_queue { struct ib_umem *umem; + struct gdma_queue *kmem; u64 gdma_region; u64 id; }; @@ -64,6 +77,9 @@ struct mana_ib_dev { struct gdma_queue **eqs; struct xarray qp_table_wq; struct mana_ib_adapter_caps adapter_caps; + struct dma_pool *av_pool; + netdevice_tracker dev_tracker; + struct notifier_block nb; }; struct mana_ib_wq { @@ -87,6 +103,25 @@ struct mana_ib_pd { u32 tx_vp_offset; }; +struct mana_ib_av { + u8 dest_ip[16]; + u8 dest_mac[ETH_ALEN]; + u16 udp_src_port; + u8 src_ip[16]; + u32 hop_limit : 8; + u32 reserved1 : 12; + u32 dscp : 6; + u32 reserved2 : 5; + u32 is_ipv6 : 1; + u32 reserved3 : 32; +}; + +struct mana_ib_ah { + struct ib_ah ibah; + struct mana_ib_av *av; + dma_addr_t dma_handle; +}; + struct mana_ib_mr { struct ib_mr ibmr; struct ib_umem *umem; @@ -96,6 +131,10 @@ struct mana_ib_mr { struct mana_ib_cq { struct ib_cq ibcq; struct mana_ib_queue queue; + /* protects CQ polling */ + spinlock_t cq_lock; + struct list_head list_send_qp; + struct list_head list_recv_qp; int cqe; u32 comp_vector; mana_handle_t cq_handle; @@ -114,6 +153,17 @@ struct mana_ib_rc_qp { struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX]; }; +enum mana_ud_queue_type { + MANA_UD_SEND_QUEUE = 0, + MANA_UD_RECV_QUEUE, + MANA_UD_QUEUE_TYPE_MAX, +}; + +struct mana_ib_ud_qp { + struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX]; + u32 sq_psn; +}; + struct mana_ib_qp { struct ib_qp ibqp; @@ -121,11 +171,17 @@ struct mana_ib_qp { union { struct mana_ib_queue raw_sq; struct mana_ib_rc_qp rc_qp; + struct mana_ib_ud_qp ud_qp; }; /* The port on the IB device, starting with 1 */ u32 port; + struct list_head cq_send_list; + struct list_head cq_recv_list; + struct shadow_queue shadow_rq; + struct shadow_queue shadow_sq; + refcount_t refcount; struct completion free; }; @@ -145,17 +201,24 @@ enum mana_ib_command_code { MANA_IB_DESTROY_ADAPTER = 0x30003, MANA_IB_CONFIG_IP_ADDR = 0x30004, MANA_IB_CONFIG_MAC_ADDR = 0x30005, + MANA_IB_CREATE_UD_QP = 0x30006, + MANA_IB_DESTROY_UD_QP = 0x30007, MANA_IB_CREATE_CQ = 0x30008, MANA_IB_DESTROY_CQ = 0x30009, MANA_IB_CREATE_RC_QP = 0x3000a, MANA_IB_DESTROY_RC_QP = 0x3000b, MANA_IB_SET_QP_STATE = 0x3000d, + MANA_IB_QUERY_VF_COUNTERS = 0x30022, }; struct mana_ib_query_adapter_caps_req { struct gdma_req_hdr hdr; }; /*HW Data */ +enum mana_ib_adapter_features { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4), +}; + struct mana_ib_query_adapter_caps_resp { struct gdma_resp_hdr hdr; u32 max_sq_id; @@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp { u32 max_send_sge_count; u32 max_recv_sge_count; u32 max_inline_data_size; + u64 feature_flags; }; /* HW Data */ +enum mana_ib_adapter_features_request { + MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1), +}; /*HW Data */ + struct mana_rnic_create_adapter_req { struct gdma_req_hdr hdr; u32 notify_eq_id; @@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +struct mana_rnic_create_udqp_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t pd_handle; + mana_handle_t send_cq_handle; + mana_handle_t recv_cq_handle; + u64 dma_region[MANA_UD_QUEUE_TYPE_MAX]; + u32 qp_type; + u32 doorbell_page; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; +}; /* HW Data */ + +struct mana_rnic_create_udqp_resp { + struct gdma_resp_hdr hdr; + mana_handle_t qp_handle; + u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX]; +}; /* HW Data*/ + +struct mana_rnic_destroy_udqp_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; + mana_handle_t qp_handle; +}; /* HW Data */ + +struct mana_rnic_destroy_udqp_resp { + struct gdma_resp_hdr hdr; +}; /* HW Data */ + struct mana_ib_ah_attr { u8 src_addr[16]; u8 dest_addr[16]; @@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +enum WQE_OPCODE_TYPES { + WQE_TYPE_UD_SEND = 0, + WQE_TYPE_UD_RECV = 8, +}; /* HW DATA */ + +struct rdma_send_oob { + u32 wqe_type : 5; + u32 fence : 1; + u32 signaled : 1; + u32 solicited : 1; + u32 psn : 24; + + u32 ssn_or_rqpn : 24; + u32 reserved1 : 8; + union { + struct { + u32 remote_qkey; + u32 immediate; + u32 reserved1; + u32 reserved2; + } ud_send; + }; +}; /* HW DATA */ + +struct mana_rdma_cqe { + union { + struct { + u8 cqe_type; + u8 data[GDMA_COMP_DATA_SIZE - 1]; + }; + struct { + u32 cqe_type : 8; + u32 vendor_error : 9; + u32 reserved1 : 15; + u32 sge_offset : 5; + u32 tx_wqe_offset : 27; + } ud_send; + struct { + u32 cqe_type : 8; + u32 reserved1 : 24; + u32 msg_len; + u32 src_qpn : 24; + u32 reserved2 : 8; + u32 imm_data; + u32 rx_wqe_offset; + } ud_recv; + }; +}; /* HW DATA */ + +struct mana_rnic_query_vf_cntrs_req { + struct gdma_req_hdr hdr; + mana_handle_t adapter; +}; /* HW Data */ + +struct mana_rnic_query_vf_cntrs_resp { + struct gdma_resp_hdr hdr; + u64 requester_timeout; + u64 requester_oos_nak; + u64 requester_rnr_nak; + u64 responder_rnr_nak; + u64 responder_oos; + u64 responder_dup_request; + u64 requester_implicit_nak; + u64 requester_readresp_psn_mismatch; + u64 nak_inv_req; + u64 nak_access_err; + u64 nak_opp_err; + u64 nak_inv_read; + u64 responder_local_len_err; + u64 requestor_local_prot_err; + u64 responder_rem_access_err; + u64 responder_local_qp_err; + u64 responder_malformed_wqe; + u64 general_hw_err; + u64 requester_rnr_nak_retries_exceeded; + u64 requester_retries_exceeded; + u64 total_fatal_err; + u64 received_cnps; + u64 num_qps_congested; + u64 rate_inc_events; + u64 num_qps_recovered; + u64 current_rate; +}; /* HW Data */ + static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) { return mdev->gdma_dev->gdma_context; } static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev, - uint32_t qid) + u32 qid, bool is_sq) { struct mana_ib_qp *qp; unsigned long flag; + if (is_sq) + qid |= MANA_SENDQ_MASK; + xa_lock_irqsave(&mdev->qp_table_wq, flag); qp = xa_load(&mdev->qp_table_wq, qid); if (qp) @@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, mana_handle_t gdma_region); +int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type, + struct mana_ib_queue *queue); int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size, struct mana_ib_queue *queue); void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue); @@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq); int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, struct ib_qp_init_attr *attr, u32 doorbell, u64 flags); int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp); + +int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp, + struct ib_qp_init_attr *attr, u32 doorbell, u32 type); +int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp); + +int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags); + +int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); +int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); + +int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + +struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int fd, int mr_access_flags, + struct uverbs_attr_bundle *attrs); #endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c index 887b09dd86e7..f99557ec7767 100644 --- a/drivers/infiniband/hw/mana/mr.c +++ b/drivers/infiniband/hw/mana/mr.c @@ -8,6 +8,8 @@ #define VALID_MR_FLAGS \ (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) +#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE) + static enum gdma_mr_access_flags mana_ib_verbs_to_gdma_access_flags(int access_flags) { @@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, req.mr_type = mr_params->mr_type; switch (mr_params->mr_type) { + case GDMA_MR_TYPE_GPA: + break; case GDMA_MR_TYPE_GVA: req.gva.dma_region_handle = mr_params->gva.dma_region_handle; req.gva.virtual_address = mr_params->gva.virtual_address; @@ -169,6 +173,107 @@ err_free: return ERR_PTR(err); } +struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int fd, int access_flags, + struct uverbs_attr_bundle *attrs) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct ib_umem_dmabuf *umem_dmabuf; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + u64 dma_region_handle; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + access_flags &= ~IB_ACCESS_OPTIONAL; + if (access_flags & ~VALID_MR_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + + err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova); + if (err) { + ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n", + err); + goto err_umem; + } + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GVA; + mr_params.gva.dma_region_handle = dma_region_handle; + mr_params.gva.virtual_address = iova; + mr_params.gva.access_flags = + mana_ib_verbs_to_gdma_access_flags(access_flags); + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_dma_region; + + /* + * There is no need to keep track of dma_region_handle after MR is + * successfully created. The dma_region_handle is tracked in the PF + * as part of the lifecycle of this MR. + */ + + return &mr->ibmr; + +err_dma_region: + mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + if (access_flags & ~VALID_DMA_MR_FLAGS) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GPA; + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_free; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b6f..c928af58f38b 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -398,18 +398,128 @@ err_free_vport: return err; } +static u32 mana_ib_wqe_size(u32 sge, u32 oob_size) +{ + u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size; + + return ALIGN(wqe_size, GDMA_WQE_BU_SIZE); +} + +static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type) +{ + u32 queue_size; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + queue_size = attr->cap.max_send_wr * + mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE); + else + queue_size = attr->cap.max_recv_wr * + mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE); + break; + default: + return 0; + } + + return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size)); +} + +static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type) +{ + enum gdma_queue_type type; + + switch (attr->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + if (queue_type == MANA_UD_SEND_QUEUE) + type = GDMA_SQ; + else + type = GDMA_RQ; + break; + default: + type = GDMA_INVALID_QUEUE; + } + return type; +} + +static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, + GFP_KERNEL); +} + +static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); +} + +static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + int err; + + err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL); + if (err) + return err; + + err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL); + if (err) + goto remove_sq; + + return 0; + +remove_sq: + xa_erase_irq(&mdev->qp_table_wq, qids); + return err; +} + +static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK; + u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + + xa_erase_irq(&mdev->qp_table_wq, qids); + xa_erase_irq(&mdev->qp_table_wq, qidr); +} + static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { refcount_set(&qp->refcount, 1); init_completion(&qp->free); - return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, - GFP_KERNEL); + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + return mana_table_store_rc_qp(mdev, qp); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_table_store_ud_qp(mdev, qp); + default: + ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n", + qp->ibqp.qp_type); + } + + return -EINVAL; } static void mana_table_remove_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) { - xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + mana_table_remove_rc_qp(mdev, qp); + break; + case IB_QPT_UD: + case IB_QPT_GSI: + mana_table_remove_ud_qp(mdev, qp); + break; + default: + ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n", + qp->ibqp.qp_type); + return; + } mana_put_qp_ref(qp); wait_for_completion(&qp->free); } @@ -490,6 +600,105 @@ destroy_queues: return err; } +static void mana_add_qp_to_cqs(struct mana_ib_qp *qp) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + unsigned long flags; + + spin_lock_irqsave(&send_cq->cq_lock, flags); + list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp); + spin_unlock_irqrestore(&send_cq->cq_lock, flags); + + spin_lock_irqsave(&recv_cq->cq_lock, flags); + list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp); + spin_unlock_irqrestore(&recv_cq->cq_lock, flags); +} + +static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp) +{ + struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq); + unsigned long flags; + + spin_lock_irqsave(&send_cq->cq_lock, flags); + list_del(&qp->cq_send_list); + spin_unlock_irqrestore(&send_cq->cq_lock, flags); + + spin_lock_irqsave(&recv_cq->cq_lock, flags); + list_del(&qp->cq_recv_list); + spin_unlock_irqrestore(&recv_cq->cq_lock, flags); +} + +static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct gdma_context *gc = mdev_to_gc(mdev); + u32 doorbell, queue_size; + int i, err; + + if (udata) { + ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n"); + return -EOPNOTSUPP; + } + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) { + queue_size = mana_ib_queue_size(attr, i); + err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i), + &qp->ud_qp.queues[i]); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", + i, err); + goto destroy_queues; + } + } + doorbell = gc->mana_ib.doorbell; + + err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr, + sizeof(struct ud_rq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err); + goto destroy_queues; + } + err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr, + sizeof(struct ud_sq_shadow_wqe)); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err); + goto destroy_shadow_queues; + } + + err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err); + goto destroy_shadow_queues; + } + qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id; + qp->port = attr->port_num; + + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) + qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id; + + err = mana_table_store_qp(mdev, qp); + if (err) + goto destroy_qp; + + mana_add_qp_to_cqs(qp); + + return 0; + +destroy_qp: + mana_ib_gd_destroy_ud_qp(mdev, qp); +destroy_shadow_queues: + destroy_shadow_queue(&qp->shadow_rq); + destroy_shadow_queue(&qp->shadow_sq); +destroy_queues: + while (i-- > 0) + mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); + return err; +} + int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); case IB_QPT_RC: return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata); default: ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", attr->qp_type); @@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { switch (ibqp->qp_type) { case IB_QPT_RC: + case IB_QPT_UD: + case IB_QPT_GSI: return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata); default: ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type); @@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata) return 0; } +static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + int i; + + mana_remove_qp_from_cqs(qp); + mana_table_remove_qp(mdev, qp); + + destroy_shadow_queue(&qp->shadow_rq); + destroy_shadow_queue(&qp->shadow_sq); + + /* Ignore return code as there is not much we can do about it. + * The error message is printed inside. + */ + mana_ib_gd_destroy_ud_qp(mdev, qp); + for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) + mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]); + + return 0; +} + int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); @@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return mana_ib_destroy_qp_raw(qp, udata); case IB_QPT_RC: return mana_ib_destroy_rc_qp(qp, udata); + case IB_QPT_UD: + case IB_QPT_GSI: + return mana_ib_destroy_ud_qp(qp, udata); default: ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", ibqp->qp_type); diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h new file mode 100644 index 000000000000..a4b3818f9c39 --- /dev/null +++ b/drivers/infiniband/hw/mana/shadow_queue.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_SHADOW_QUEUE_H_ +#define _MANA_SHADOW_QUEUE_H_ + +struct shadow_wqe_header { + u16 opcode; + u16 error_code; + u32 posted_wqe_size; + u64 wr_id; +}; + +struct ud_rq_shadow_wqe { + struct shadow_wqe_header header; + u32 byte_len; + u32 src_qpn; +}; + +struct ud_sq_shadow_wqe { + struct shadow_wqe_header header; +}; + +struct shadow_queue { + /* Unmasked producer index, Incremented on wqe posting */ + u64 prod_idx; + /* Unmasked consumer index, Incremented on cq polling */ + u64 cons_idx; + /* Unmasked index of next-to-complete (from HW) shadow WQE */ + u64 next_to_complete_idx; + /* queue size in wqes */ + u32 length; + /* distance between elements in bytes */ + u32 stride; + /* ring buffer holding wqes */ + void *buffer; +}; + +static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride) +{ + queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL); + if (!queue->buffer) + return -ENOMEM; + + queue->length = length; + queue->stride = stride; + + return 0; +} + +static inline void destroy_shadow_queue(struct shadow_queue *queue) +{ + kvfree(queue->buffer); +} + +static inline bool shadow_queue_full(struct shadow_queue *queue) +{ + return (queue->prod_idx - queue->cons_idx) >= queue->length; +} + +static inline bool shadow_queue_empty(struct shadow_queue *queue) +{ + return queue->prod_idx == queue->cons_idx; +} + +static inline void * +shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index) +{ + u32 index = unmasked_index % queue->length; + + return ((u8 *)queue->buffer + index * queue->stride); +} + +static inline void * +shadow_queue_producer_entry(struct shadow_queue *queue) +{ + return shadow_queue_get_element(queue, queue->prod_idx); +} + +static inline void * +shadow_queue_get_next_to_consume(const struct shadow_queue *queue) +{ + if (queue->cons_idx == queue->next_to_complete_idx) + return NULL; + + return shadow_queue_get_element(queue, queue->cons_idx); +} + +static inline void * +shadow_queue_get_next_to_complete(struct shadow_queue *queue) +{ + if (queue->next_to_complete_idx == queue->prod_idx) + return NULL; + + return shadow_queue_get_element(queue, queue->next_to_complete_idx); +} + +static inline void shadow_queue_advance_producer(struct shadow_queue *queue) +{ + queue->prod_idx++; +} + +static inline void shadow_queue_advance_consumer(struct shadow_queue *queue) +{ + queue->cons_idx++; +} + +static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue) +{ + queue->next_to_complete_idx++; +} + +#endif diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c new file mode 100644 index 000000000000..1813567d3b16 --- /dev/null +++ b/drivers/infiniband/hw/mana/wr.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +#define MAX_WR_SGL_NUM (2) + +static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr) +{ + struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem; + struct gdma_posted_wqe_info wqe_info = {0}; + struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM]; + struct gdma_wqe_request wqe_req = {0}; + struct ud_rq_shadow_wqe *shadow_wqe; + int err, i; + + if (shadow_queue_full(&qp->shadow_rq)) + return -EINVAL; + + if (wr->num_sge > MAX_WR_SGL_NUM) + return -EINVAL; + + for (i = 0; i < wr->num_sge; ++i) { + gdma_sgl[i].address = wr->sg_list[i].addr; + gdma_sgl[i].mem_key = wr->sg_list[i].lkey; + gdma_sgl[i].size = wr->sg_list[i].length; + } + wqe_req.num_sge = wr->num_sge; + wqe_req.sgl = gdma_sgl; + + err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info); + if (err) + return err; + + shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq); + memset(shadow_wqe, 0, sizeof(*shadow_wqe)); + shadow_wqe->header.opcode = IB_WC_RECV; + shadow_wqe->header.wr_id = wr->wr_id; + shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu; + shadow_queue_advance_producer(&qp->shadow_rq); + + mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue); + return 0; +} + +int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + int err = 0; + + for (; wr; wr = wr->next) { + switch (ibqp->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + err = mana_ib_post_recv_ud(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + return err; + } + break; + default: + ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n", + ibqp->qp_type); + return -EINVAL; + } + } + + return err; +} + +static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr) +{ + struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah); + struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port); + struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem; + struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1]; + struct gdma_posted_wqe_info wqe_info = {0}; + struct gdma_wqe_request wqe_req = {0}; + struct rdma_send_oob send_oob = {0}; + struct ud_sq_shadow_wqe *shadow_wqe; + int err, i; + + if (!ndev) { + ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n", + qp->port, qp->ibqp.qp_num); + return -EINVAL; + } + + if (wr->wr.opcode != IB_WR_SEND) + return -EINVAL; + + if (shadow_queue_full(&qp->shadow_sq)) + return -EINVAL; + + if (wr->wr.num_sge > MAX_WR_SGL_NUM) + return -EINVAL; + + gdma_sgl[0].address = ah->dma_handle; + gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey; + gdma_sgl[0].size = sizeof(struct mana_ib_av); + for (i = 0; i < wr->wr.num_sge; ++i) { + gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr; + gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey; + gdma_sgl[i + 1].size = wr->wr.sg_list[i].length; + } + + wqe_req.num_sge = wr->wr.num_sge + 1; + wqe_req.sgl = gdma_sgl; + wqe_req.inline_oob_size = sizeof(struct rdma_send_oob); + wqe_req.inline_oob_data = &send_oob; + wqe_req.flags = GDMA_WR_OOB_IN_SGL; + wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu)); + + send_oob.wqe_type = WQE_TYPE_UD_SEND; + send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE); + send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED); + send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED); + send_oob.psn = qp->ud_qp.sq_psn; + send_oob.ssn_or_rqpn = wr->remote_qpn; + send_oob.ud_send.remote_qkey = + qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey; + + err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info); + if (err) + return err; + + qp->ud_qp.sq_psn++; + shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq); + memset(shadow_wqe, 0, sizeof(*shadow_wqe)); + shadow_wqe->header.opcode = IB_WC_SEND; + shadow_wqe->header.wr_id = wr->wr.wr_id; + shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu; + shadow_queue_advance_producer(&qp->shadow_sq); + + mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue); + return 0; +} + +int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) +{ + int err; + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + + for (; wr; wr = wr->next) { + switch (ibqp->qp_type) { + case IB_QPT_UD: + case IB_QPT_GSI: + err = mana_ib_post_send_ud(qp, ud_wr(wr)); + if (unlikely(err)) { + *bad_wr = wr; + return err; + } + break; + default: + ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n", + ibqp->qp_type); + return -EINVAL; + } + } + + return err; +} |