summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mana
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mana')
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c228
-rw-r--r--drivers/infiniband/hw/mana/device.c82
-rw-r--r--drivers/infiniband/hw/mana/main.c103
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h210
-rw-r--r--drivers/infiniband/hw/mana/mr.c105
-rw-r--r--drivers/infiniband/hw/mana/qp.c245
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
12 files changed, 1420 insertions, 45 deletions
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..921c05e08b11 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index f04a679d2871..0fc4e2679218 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
bool is_rnic_cq;
u32 doorbell;
+ u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
cq->cq_handle = INVALID_MANA_HANDLE;
- if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
- return -EINVAL;
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
- is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
- if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
- cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
- if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
- return err;
- }
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
- mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
- ibucontext);
- doorbell = mana_ucontext->doorbell;
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = gc->mana_ib.doorbell;
+ }
if (is_rnic_cq) {
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
@@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
- resp.cqid = cq->queue.id;
- err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
- goto err_remove_cq_cb;
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
}
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
return 0;
err_remove_cq_cb:
@@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return -EINVAL;
/* Create CQ table entry */
WARN_ON(gc->cq_table[cq->queue.id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
if (!gdma_cq)
return -ENOMEM;
@@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
return;
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
kfree(gc->cq_table[cq->queue.id]);
gc->cq_table[cq->queue.id] = NULL;
}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 3416a85f8738..b31089320aa5 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
@@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.dealloc_ucontext = mana_ib_dealloc_ucontext,
.del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
.get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
.query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
@@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+
+ /* Only process events from our parent device */
+ if (event_dev != mc->ports[0])
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
@@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
- rcu_read_lock(); /* required to get primary netdev */
- ndev = mana_get_primary_netdev_rcu(mc, 0);
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
if (!ndev) {
- rcu_read_unlock();
ret = -ENODEV;
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
goto free_ib_device;
@@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
ether_addr_copy(mac_addr, ndev->dev_addr);
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- rcu_read_unlock();
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
goto free_ib_device;
@@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev,
}
dev->gdma_dev = &mdev->gdma_context->mana_ib;
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto deregister_device;
+ }
+
ret = mana_ib_gd_query_adapter_caps(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
ret = mana_ib_create_eqs(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
ret = mana_ib_gd_create_rnic_adapter(dev);
@@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev,
goto destroy_rnic;
}
+ dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
+ MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto destroy_rnic;
+ }
+
ret = ib_register_device(&dev->ib_dev, "mana_%d",
mdev->gdma_context->dev);
if (ret)
- goto destroy_rnic;
+ goto deallocate_pool;
dev_set_drvdata(&adev->dev, dev);
return 0;
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
destroy_rnic:
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+ unregister_netdevice_notifier(&dev->nb);
deregister_device:
mana_gd_deregister_device(dev->gdma_dev);
free_ib_device:
@@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 457cea6d9909..eda9c5b971de 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
req.flags = flags;
err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
@@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue)
{
@@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue
*/
mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
}
static int
@@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
- int err;
+ int err = 0;
gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
@@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1)
+ if (port_num == 1) {
immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ }
return 0;
}
@@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1)
+ if (port == 1) {
props->gid_tbl_len = 16;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ }
return 0;
}
@@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
sizeof(resp));
- req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
req.hdr.dev_id = dev->gdma_dev->dev_id;
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
@@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
caps->max_inline_data_size = resp.max_inline_data_size;
caps->max_send_sge_count = resp.max_send_sge_count;
caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
return 0;
}
@@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
switch (event->type) {
case GDMA_EQE_RNIC_QP_FATAL:
qpn = event->details[0];
- qp = mana_get_qp_ref(mdev, qpn);
+ qp = mana_get_qp_ref(mdev, qpn, false);
if (!qp)
break;
if (qp->ibqp.event_handler) {
@@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
req.hdr.dev_id = gc->mana_ib.dev_id;
req.notify_eq_id = mdev->fatal_err_eq->id;
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err) {
ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
@@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
}
return 0;
}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index b53a5b4de908..6903946677e5 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -11,8 +11,11 @@
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -21,6 +24,9 @@
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
@@ -32,6 +38,11 @@
*/
#define MANA_CA_ACK_DELAY 16
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -48,10 +59,12 @@ struct mana_ib_adapter_caps {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
};
struct mana_ib_queue {
struct ib_umem *umem;
+ struct gdma_queue *kmem;
u64 gdma_region;
u64 id;
};
@@ -64,6 +77,9 @@ struct mana_ib_dev {
struct gdma_queue **eqs;
struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
};
struct mana_ib_wq {
@@ -87,6 +103,25 @@ struct mana_ib_pd {
u32 tx_vp_offset;
};
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -96,6 +131,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
int cqe;
u32 comp_vector;
mana_handle_t cq_handle;
@@ -114,6 +153,17 @@ struct mana_ib_rc_qp {
struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
};
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
struct mana_ib_qp {
struct ib_qp ibqp;
@@ -121,11 +171,17 @@ struct mana_ib_qp {
union {
struct mana_ib_queue raw_sq;
struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
};
/* The port on the IB device, starting with 1 */
u32 port;
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
refcount_t refcount;
struct completion free;
};
@@ -145,17 +201,24 @@ enum mana_ib_command_code {
MANA_IB_DESTROY_ADAPTER = 0x30003,
MANA_IB_CONFIG_IP_ADDR = 0x30004,
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
MANA_IB_CREATE_CQ = 0x30008,
MANA_IB_DESTROY_CQ = 0x30009,
MANA_IB_CREATE_RC_QP = 0x3000a,
MANA_IB_DESTROY_RC_QP = 0x3000b,
MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
struct gdma_req_hdr hdr;
}; /*HW Data */
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+};
+
struct mana_ib_query_adapter_caps_resp {
struct gdma_resp_hdr hdr;
u32 max_sq_id;
@@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
}; /* HW Data */
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
struct mana_rnic_create_adapter_req {
struct gdma_req_hdr hdr;
u32 notify_eq_id;
@@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
struct mana_ib_ah_attr {
u8 src_addr[16];
u8 dest_addr[16];
@@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
}
static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
- uint32_t qid)
+ u32 qid, bool is_sq)
{
struct mana_ib_qp *qp;
unsigned long flag;
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
xa_lock_irqsave(&mdev->qp_table_wq, flag);
qp = xa_load(&mdev->qp_table_wq, qid);
if (qp)
@@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue);
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
@@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 887b09dd86e7..f99557ec7767 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -8,6 +8,8 @@
#define VALID_MR_FLAGS \
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
+
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
{
@@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
@@ -169,6 +173,107 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 73d67c853b6f..c928af58f38b 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -398,18 +398,128 @@ err_free_vport:
return err;
}
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
{
refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
- return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
- GFP_KERNEL);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
}
static void mana_table_remove_qp(struct mana_ib_dev *mdev,
struct mana_ib_qp *qp)
{
- xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
mana_put_qp_ref(qp);
wait_for_completion(&qp->free);
}
@@ -490,6 +600,105 @@ destroy_queues:
return err;
}
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = gc->mana_ib.doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
case IB_QPT_RC:
return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
default:
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
@@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
switch (ibqp->qp_type) {
case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
default:
ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
@@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
return 0;
}
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
@@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return mana_ib_destroy_qp_raw(qp, udata);
case IB_QPT_RC:
return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}