diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 21:08:40 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-26 21:08:40 -0700 |
commit | 780d8ce7162818cfe03e9a5e23b3af192a1d37bc (patch) | |
tree | 7af6427428434175118ee79e9a08aa66e750efa0 | |
parent | 090b39af591cdde897664dfa5d3d5b0c78a197fb (diff) | |
parent | 9c477178a0a187c4718c228cc6e0692564811441 (diff) | |
download | linux-stable-780d8ce7162818cfe03e9a5e23b3af192a1d37bc.tar.gz linux-stable-780d8ce7162818cfe03e9a5e23b3af192a1d37bc.tar.bz2 linux-stable-780d8ce7162818cfe03e9a5e23b3af192a1d37bc.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Small collection of incremental improvement patches:
- Minor code cleanup patches, comment improvements, etc from static
tools
- Clean the some of the kernel caps, reducing the historical stealth
uAPI leftovers
- Bug fixes and minor changes for rdmavt, hns, rxe, irdma
- Remove unimplemented cruft from rxe
- Reorganize UMR QP code in mlx5 to avoid going through the IB verbs
layer
- flush_workqueue(system_unbound_wq) removal
- Ensure rxe waits for objects to be unused before allowing the core
to free them
- Several rc quality bug fixes for hfi1"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (67 commits)
RDMA/rtrs-clt: Fix one kernel-doc comment
RDMA/hfi1: Remove all traces of diagpkt support
RDMA/hfi1: Consolidate software versions
RDMA/hfi1: Remove pointless driver version
RDMA/hfi1: Fix potential integer multiplication overflow errors
RDMA/hfi1: Prevent panic when SDMA is disabled
RDMA/hfi1: Prevent use of lock before it is initialized
RDMA/rxe: Fix an error handling path in rxe_get_mcg()
IB/core: Fix typo in comment
RDMA/core: Fix typo in comment
IB/hf1: Fix typo in comment
IB/qib: Fix typo in comment
IB/iser: Fix typo in comment
RDMA/mlx4: Avoid flush_scheduled_work() usage
IB/isert: Avoid flush_scheduled_work() usage
RDMA/mlx5: Remove duplicate pointer assignment in mlx5_ib_alloc_implicit_mr()
RDMA/qedr: Remove unnecessary synchronize_irq() before free_irq()
RDMA/hns: Use hr_reg_read() instead of remaining roce_get_xxx()
RDMA/hns: Use hr_reg_xxx() instead of remaining roce_set_xxx()
RDMA/irdma: Add SW mechanism to generate completions on error
...
88 files changed, 1968 insertions, 2002 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 4deb60a3b43f..d275db195f1a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_unbound_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); +static struct workqueue_struct *ib_unreg_wq; /* * Each of the three rwsem locks (devices, clients, client_data) protects the @@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev) WARN_ON(!refcount_read(&ib_dev->refcount)); WARN_ON(!ib_dev->ops.dealloc_driver); get_device(&ib_dev->dev); - if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work)) + if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work)) put_device(&ib_dev->dev); } EXPORT_SYMBOL(ib_unregister_device_queued); @@ -2751,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { static int __init ib_core_init(void) { - int ret; + int ret = -ENOMEM; ib_wq = alloc_workqueue("infiniband", 0, 0); if (!ib_wq) return -ENOMEM; + ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!ib_unreg_wq) + goto err; + ib_comp_wq = alloc_workqueue("ib-comp-wq", WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); - if (!ib_comp_wq) { - ret = -ENOMEM; - goto err; - } + if (!ib_comp_wq) + goto err_unbound; ib_comp_unbound_wq = alloc_workqueue("ib-comp-unb-wq", WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE); - if (!ib_comp_unbound_wq) { - ret = -ENOMEM; + if (!ib_comp_unbound_wq) goto err_comp; - } ret = class_register(&ib_class); if (ret) { @@ -2831,6 +2833,8 @@ err_comp_unbound: destroy_workqueue(ib_comp_unbound_wq); err_comp: destroy_workqueue(ib_comp_wq); +err_unbound: + destroy_workqueue(ib_unreg_wq); err: destroy_workqueue(ib_wq); return ret; @@ -2852,7 +2856,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); - flush_workqueue(system_unbound_wq); + destroy_workqueue(ib_unreg_wq); WARN_ON(!xa_empty(&clients)); WARN_ON(!xa_empty(&devices)); } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index ca24ce34da76..b92358f606d0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (!device) return -EINVAL; - if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) { + if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { ib_device_put(device); return -EINVAL; } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8dc7d1f4b35d..003e504feca2 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1034,10 +1034,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_ext_ack *extack) { unsigned long flags; - struct ib_sa_query *query; + struct ib_sa_query *query = NULL, *iter; struct ib_mad_send_buf *send_buf; struct ib_mad_send_wc mad_send_wc; - int found = 0; int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || @@ -1045,20 +1044,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); - list_for_each_entry(query, &ib_nl_request_list, list) { + list_for_each_entry(iter, &ib_nl_request_list, list) { /* * If the query is cancelled, let the timeout routine * take care of it. */ - if (nlh->nlmsg_seq == query->seq) { - found = !ib_sa_query_cancelled(query); - if (found) - list_del(&query->list); + if (nlh->nlmsg_seq == iter->seq) { + if (!ib_sa_query_cancelled(iter)) { + list_del(&iter->list); + query = iter; + } break; } } - if (!found) { + if (!query) { spin_unlock_irqrestore(&ib_nl_request_lock, flags); goto resp_out; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index aead24c1a682..186ed8859920 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -455,7 +455,7 @@ retry: break; } } - /* upon sucesss lock should stay on hold for the callee */ + /* upon success lock should stay on hold for the callee */ if (!ret) ret = dma_index - start_idx; else diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6b6393176b3c..046376bd68e2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext, resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; - resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); + resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge); resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a9819c40a140..e54b3f1b730e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -281,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, } rdma_restrack_add(&pd->res); - if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; @@ -308,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, pd->__internal_mr = mr; - if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) @@ -2131,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_mr *mr; if (access_flags & IB_ACCESS_ON_DEMAND) { - if (!(pd->device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING)) { + if (!(pd->device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING)) { pr_debug("ODP support not available\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 3224f18a66e5..989edc789633 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -146,13 +146,13 @@ int bnxt_re_query_device(struct ib_device *ibdev, | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID - | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_N_NOTIFY_CQ | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; + ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; ib_attr->max_send_sge = dev_attr->max_qp_sges; ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 12f33467c672..50cb2259bf87 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -314,7 +314,6 @@ enum db_state { struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; - u32 device_cap_flags; struct xarray cqs; struct xarray qps; struct xarray mrs; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 89f36a3a9af0..246b739ddb2b 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -269,7 +269,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro dev->rdev.lldi.ports[0]->dev_addr); props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); props->fw_ver = dev->rdev.lldi.fw_vers; - props->device_cap_flags = dev->device_cap_flags; + props->device_cap_flags = IB_DEVICE_MEM_WINDOW; + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; + if (fastreg_support) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->page_size_cap = T4_PAGESIZE_MASK; props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; @@ -529,9 +532,6 @@ void c4iw_register_device(struct work_struct *work) pr_debug("c4iw_dev %p\n", dev); addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; - if (fastreg_support) - dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.local_dma_lkey = 0; dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995991d9709d..166ad6b828dc 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -137,61 +137,6 @@ #define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \ HFI1_USER_SWMINOR) -#ifndef HFI1_KERN_TYPE -#define HFI1_KERN_TYPE 0 -#endif - -/* - * Similarly, this is the kernel version going back to the user. It's - * slightly different, in that we want to tell if the driver was built as - * part of a Intel release, or from the driver from openfabrics.org, - * kernel.org, or a standard distribution, for support reasons. - * The high bit is 0 for non-Intel and 1 for Intel-built/supplied. - * - * It's returned by the driver to the user code during initialization in the - * spi_sw_version field of hfi1_base_info, so the user code can in turn - * check for compatibility with the kernel. -*/ -#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION) - -/* - * Define the driver version number. This is something that refers only - * to the driver itself, not the software interfaces it supports. - */ -#ifndef HFI1_DRIVER_VERSION_BASE -#define HFI1_DRIVER_VERSION_BASE "0.9-294" -#endif - -/* create the final driver version string */ -#ifdef HFI1_IDSTR -#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR -#else -#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE -#endif - -/* - * Diagnostics can send a packet by writing the following - * struct to the diag packet special file. - * - * This allows a custom PBC qword, so that special modes and deliberate - * changes to CRCs can be used. - */ -#define _DIAG_PKT_VERS 1 -struct diag_pkt { - __u16 version; /* structure version */ - __u16 unit; /* which device */ - __u16 sw_index; /* send sw index to use */ - __u16 len; /* data length, in bytes */ - __u16 port; /* port number */ - __u16 unused; - __u32 flags; /* call flags */ - __u64 data; /* user data pointer */ - __u64 pbc; /* PBC for the packet */ -}; - -/* diag_pkt flags */ -#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */ - /* * The next set of defines are for packet headers, and chip register * and memory bits that are visible to and/or used by user-mode software. diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index e2c634af40e9..8e71bef9d982 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -29,12 +29,6 @@ #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt -/* - * The size has to be longer than this string, so we can append - * board/chip information to it in the initialization code. - */ -const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n"; - DEFINE_MUTEX(hfi1_mutex); /* general driver use */ unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU; diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c index e8ed05516bf2..7741a1d69097 100644 --- a/drivers/infiniband/hw/hfi1/efivar.c +++ b/drivers/infiniband/hw/hfi1/efivar.c @@ -72,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size, * is in the EFIVAR_FS code and may not be compiled in. * However, even that is insufficient since it does not cover * EFI_BUFFER_TOO_SMALL which could be an important return. - * For now, just split out succces or not found. + * For now, just split out success or not found. */ ret = status == EFI_SUCCESS ? 0 : status == EFI_NOT_FOUND ? -ENOENT : diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1783a6ea5427..2e4cf2b11653 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) unsigned long dim = from->nr_segs; int idx; + if (!HFI1_CAP_IS_KSET(SDMA)) + return -EINVAL; idx = srcu_read_lock(&fd->pq_srcu); pq = srcu_dereference(fd->pq, &fd->pq_srcu); if (!cq || !pq) { @@ -1220,7 +1222,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; - binfo.sw_version = HFI1_KERN_SWVERSION; + binfo.sw_version = HFI1_USER_SWVERSION; binfo.bthqp = RVT_KDETH_QP_PREFIX; binfo.jkey = uctxt->jkey; /* diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4436ed41547c..436372b31431 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -489,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd) u16 shift, mult; u64 src; u32 current_egress_rate; /* Mbits /sec */ - u32 max_pkt_time; + u64 max_pkt_time; /* * max_pkt_time is the maximum packet egress time in units * of the fabric clock period 1/(805 MHz). diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f07d328689d3..a95b654f5254 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) kvfree(sde->tx_ring); sde->tx_ring = NULL; } - spin_lock_irq(&dd->sde_map_lock); - sdma_map_free(rcu_access_pointer(dd->sdma_map)); - RCU_INIT_POINTER(dd->sdma_map, NULL); - spin_unlock_irq(&dd->sde_map_lock); - synchronize_rcu(); + if (rcu_access_pointer(dd->sdma_map)) { + spin_lock_irq(&dd->sde_map_lock); + sdma_map_free(rcu_access_pointer(dd->sdma_map)); + RCU_INIT_POINTER(dd->sdma_map, NULL); + spin_unlock_irq(&dd->sde_map_lock); + synchronize_rcu(); + } kfree(dd->per_sdma); dd->per_sdma = NULL; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 99d0743133ca..6988f6f21bde 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1300,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | - IB_DEVICE_MEM_MGT_EXTENSIONS | - IB_DEVICE_RDMA_NETDEV_OPA; + IB_DEVICE_MEM_MGT_EXTENSIONS; + rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA; rdi->dparms.props.page_size_cap = PAGE_SIZE; rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; rdi->dparms.props.vendor_part_id = dd->pcidev->device; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 3083d6db1d68..2855e9ad4b32 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -106,16 +106,6 @@ enum { SERV_TYPE_XRC = 5, }; -enum hns_roce_qp_state { - HNS_ROCE_QP_STATE_RST, - HNS_ROCE_QP_STATE_INIT, - HNS_ROCE_QP_STATE_RTR, - HNS_ROCE_QP_STATE_RTS, - HNS_ROCE_QP_STATE_SQD, - HNS_ROCE_QP_STATE_ERR, - HNS_ROCE_QP_NUM_STATE, -}; - enum hns_roce_event { HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01, HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02, @@ -139,8 +129,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17, }; -#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 - enum { HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), @@ -535,6 +523,11 @@ struct hns_roce_cmd_context { u16 busy; }; +enum hns_roce_cmdq_state { + HNS_ROCE_CMDQ_STATE_NORMAL, + HNS_ROCE_CMDQ_STATE_FATAL_ERR, +}; + struct hns_roce_cmdq { struct dma_pool *pool; struct semaphore poll_sem; @@ -554,6 +547,7 @@ struct hns_roce_cmdq { * close device, switch into poll mode(non event mode) */ u8 use_events; + enum hns_roce_cmdq_state state; }; struct hns_roce_cmd_mailbox { @@ -657,6 +651,11 @@ struct hns_roce_ceqe { __le32 rsv[15]; }; +#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l) + +#define CEQE_CQN CEQE_FIELD_LOC(23, 0) +#define CEQE_OWNER CEQE_FIELD_LOC(31, 31) + struct hns_roce_aeqe { __le32 asyn; union { @@ -676,6 +675,13 @@ struct hns_roce_aeqe { __le32 rsv[12]; }; +#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l) + +#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0) +#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8) +#define AEQE_OWNER AEQE_FIELD_LOC(31, 31) +#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32) + struct hns_roce_eq { struct hns_roce_dev *hr_dev; void __iomem *db_reg; @@ -725,7 +731,6 @@ struct hns_roce_caps { u32 num_pi_qps; u32 reserved_qps; int num_qpc_timer; - int num_cqc_timer; u32 num_srqs; u32 max_wqes; u32 max_srq_wrs; @@ -1191,7 +1196,6 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n); void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n); bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq, struct ib_cq *ib_cq); -enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2b0cef17ad45..ba3c742258ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -149,8 +149,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr, aseg->cmp_data = 0; } - roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); } static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, @@ -271,8 +270,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, dseg += sizeof(struct hns_roce_v2_rc_send_wqe); if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { - roce_set_bit(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); for (i = 0; i < wr->num_sge; i++) { memcpy(dseg, ((void *)wr->sg_list[i].addr), @@ -280,17 +278,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, dseg += wr->sg_list[i].length; } } else { - roce_set_bit(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE); ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); if (ret) return ret; - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - curr_idx - *sge_idx); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx); } *sge_idx = curr_idx; @@ -309,12 +303,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int j = 0; int i; - roce_set_field(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - (*sge_ind) & (qp->sge.sge_cnt - 1)); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, + (*sge_ind) & (qp->sge.sge_cnt - 1)); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, !!(wr->send_flags & IB_SEND_INLINE)); if (wr->send_flags & IB_SEND_INLINE) return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); @@ -339,9 +331,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, valid_num_sge - HNS_ROCE_SGE_IN_WQE); } - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); return 0; } @@ -412,8 +402,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, ud_sq_wqe->immtdata = get_immtdata(wr); - roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); return 0; } @@ -424,21 +413,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, struct ib_device *ib_dev = ah->ibah.device; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); - roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); - - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, - V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, - V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass); - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, - V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) return -EINVAL; - roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, - V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); ud_sq_wqe->sgid_index = ah->av.gid_index; @@ -448,10 +431,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) return 0; - roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S, - ah->av.vlan_en); - roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, - V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id); return 0; } @@ -476,27 +457,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE, !!(wr->send_flags & IB_SEND_SIGNALED)); - - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S, + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE, !!(wr->send_flags & IB_SEND_SOLICITED)); - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M, - V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn); - - roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); - - roce_set_field(ud_sq_wqe->byte_20, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - curr_idx & (qp->sge.sge_cnt - 1)); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX, + curr_idx & (qp->sge.sge_cnt - 1)); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); - roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, - V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn); ret = fill_ud_av(ud_sq_wqe, ah); if (ret) @@ -516,8 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, dma_wmb(); *sge_idx = curr_idx; - roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit); return 0; } @@ -553,7 +525,7 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, ret = -EOPNOTSUPP; break; case IB_WR_LOCAL_INV: - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO); fallthrough; case IB_WR_SEND_WITH_INV: rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); @@ -565,11 +537,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev, if (unlikely(ret)) return ret; - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op)); return ret; } + static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -590,13 +562,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, if (WARN_ON(ret)) return ret; - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE, (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || @@ -616,8 +588,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, dma_wmb(); *sge_idx = curr_idx; - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, - owner_bit); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit); return ret; } @@ -682,14 +653,11 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; /* All kinds of DirectWQE have the same header field layout */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, - V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, - V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, - qp->sl >> HNS_ROCE_SL_SHIFT); - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, - V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H, + qp->sl >> HNS_ROCE_SL_SHIFT); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head); hns_roce_write512(hr_dev, wqe, qp->sq.db_reg); } @@ -1265,6 +1233,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) return tail == priv->cmq.csq.head; } +static void update_cmdq_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + + if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT || + handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) + hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1296,7 +1274,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, } while (++timeout < priv->cmq.tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { - for (ret = 0, i = 0; i < num; i++) { + ret = 0; + for (i = 0; i < num; i++) { /* check the result of hardware write back */ desc[i] = csq->desc[tail++]; if (tail == csq->desc_num) @@ -1318,6 +1297,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, csq->head, tail); csq->head = tail; + update_cmdq_status(hr_dev); + ret = -EAGAIN; } @@ -1332,6 +1313,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, bool busy; int ret; + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return -EIO; + if (!v2_chk_mbox_is_avail(hr_dev, &busy)) return busy ? -EBUSY : 0; @@ -1499,7 +1483,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) if (ret) continue; - if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) { + if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) { if (vf_id == 0) hr_dev->is_reset = true; return; @@ -1510,7 +1494,7 @@ out: hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag); } -static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) +static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) { enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES; struct hns_roce_cmq_desc desc[2]; @@ -1521,17 +1505,29 @@ static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id) desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id); - hns_roce_cmq_send(hr_dev, desc, 2); + + return hns_roce_cmq_send(hr_dev, desc, 2); } static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) { + int ret; int i; + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return; + for (i = hr_dev->func_num - 1; i >= 0; i--) { __hns_roce_function_clear(hr_dev, i); - if (i != 0) - hns_roce_free_vf_resource(hr_dev, i); + + if (i == 0) + continue; + + ret = hns_roce_free_vf_resource(hr_dev, i); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to free vf resource, vf_id = %d, ret = %d.\n", + i, ret); } } @@ -1757,17 +1753,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, swt = (struct hns_roce_vf_switch *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); - roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, - VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id); + hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) return ret; desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN); desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + hr_reg_enable(swt, VF_SWITCH_ALW_LPBK); + hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK); + hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1947,7 +1942,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM; - caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM; + caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; @@ -2243,7 +2238,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); - caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; @@ -2270,87 +2264,39 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) ctx_hop_num = resp_b->ctx_hop_num; pbl_hop_num = resp_b->pbl_hop_num; - caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds, - V2_QUERY_PF_CAPS_C_NUM_PDS_M, - V2_QUERY_PF_CAPS_C_NUM_PDS_S); - caps->flags = roce_get_field(resp_c->cap_flags_num_pds, - V2_QUERY_PF_CAPS_C_CAP_FLAGS_M, - V2_QUERY_PF_CAPS_C_CAP_FLAGS_S); + caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS); + + caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS); caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << HNS_ROCE_CAP_FLAGS_EX_SHIFT; - caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs, - V2_QUERY_PF_CAPS_C_NUM_CQS_M, - V2_QUERY_PF_CAPS_C_NUM_CQS_S); - caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs, - V2_QUERY_PF_CAPS_C_MAX_GID_M, - V2_QUERY_PF_CAPS_C_MAX_GID_S); - - caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth, - V2_QUERY_PF_CAPS_C_CQ_DEPTH_M, - V2_QUERY_PF_CAPS_C_CQ_DEPTH_S); - caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws, - V2_QUERY_PF_CAPS_C_NUM_MRWS_M, - V2_QUERY_PF_CAPS_C_NUM_MRWS_S); - caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps, - V2_QUERY_PF_CAPS_C_NUM_QPS_M, - V2_QUERY_PF_CAPS_C_NUM_QPS_S); - caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps, - V2_QUERY_PF_CAPS_C_MAX_ORD_M, - V2_QUERY_PF_CAPS_C_MAX_ORD_S); + caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); + caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); + caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); + caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS); + caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS); + caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD); caps->max_qp_dest_rdma = caps->max_qp_init_rdma; caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth); - caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_NUM_SRQS_M, - V2_QUERY_PF_CAPS_D_NUM_SRQS_S); - caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_CONG_TYPE_M, - V2_QUERY_PF_CAPS_D_CONG_TYPE_S); - caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); - caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth, - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M, - V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S); - caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth, - V2_QUERY_PF_CAPS_D_NUM_CEQS_M, - V2_QUERY_PF_CAPS_D_NUM_CEQS_S); - - caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M, - V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S); - caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M, - V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S); - caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth, - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M, - V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S); - caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds, - V2_QUERY_PF_CAPS_D_RSV_PDS_M, - V2_QUERY_PF_CAPS_D_RSV_PDS_S); - caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds, - V2_QUERY_PF_CAPS_D_NUM_UARS_M, - V2_QUERY_PF_CAPS_D_NUM_UARS_S); - caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps, - V2_QUERY_PF_CAPS_D_RSV_QPS_M, - V2_QUERY_PF_CAPS_D_RSV_QPS_S); - caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps, - V2_QUERY_PF_CAPS_D_RSV_UARS_M, - V2_QUERY_PF_CAPS_D_RSV_UARS_S); - caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws, - V2_QUERY_PF_CAPS_E_RSV_MRWS_M, - V2_QUERY_PF_CAPS_E_RSV_MRWS_S); - caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws, - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M, - V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S); - caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs, - V2_QUERY_PF_CAPS_E_RSV_CQS_M, - V2_QUERY_PF_CAPS_E_RSV_CQS_S); - caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs, - V2_QUERY_PF_CAPS_E_RSV_SRQS_M, - V2_QUERY_PF_CAPS_E_RSV_SRQS_S); - caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey, - V2_QUERY_PF_CAPS_E_RSV_LKEYS_M, - V2_QUERY_PF_CAPS_E_RSV_LKEYS_S); + caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS); + caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE); + caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth); + caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH); + caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS); + caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH); + caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST); + caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST); + caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS); + caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS); + caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS); + caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS); + + caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS); + caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT); + caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS); + caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); + caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt); caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period); caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt); @@ -2365,15 +2311,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqe_hop_num = pbl_hop_num; caps->srqwqe_hop_num = pbl_hop_num; caps->idx_hop_num = pbl_hop_num; - caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S); - caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S); - caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs, - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, - V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); + caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM); + caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM); + caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM); return 0; } @@ -3000,6 +2940,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, mb_st = (struct hns_roce_mbox_status *)desc.data; end = msecs_to_jiffies(timeout) + jiffies; while (v2_chk_mbox_is_avail(hr_dev, &busy)) { + if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR) + return -EIO; + status = 0; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); @@ -3103,10 +3046,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false); - roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M, - CFG_SGID_TB_TABLE_IDX_S, gid_index); - roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M, - CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type); + hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index); + hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type); copy_gid(&sgid_tb->vf_sgid_l, gid); @@ -3141,19 +3082,14 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev, copy_gid(&tb_a->vf_sgid_l, gid); - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M, - CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type); - roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S, - vlan_id < VLAN_CFI_MASK); - roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M, - CFG_GMV_TB_VF_VLAN_ID_S, vlan_id); + hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type); + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK); + hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id); tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac); - roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M, - CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]); - roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M, - CFG_GMV_TB_SGID_IDX_S, gid_index); + hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]); + hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index); return hns_roce_cmq_send(hr_dev, desc, 2); } @@ -3202,10 +3138,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, reg_smac_l = *(u32 *)(&addr[0]); reg_smac_h = *(u16 *)(&addr[4]); - roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M, - CFG_SMAC_TB_IDX_S, phy_port); - roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, - CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); + hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port); + hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h); smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -3234,21 +3168,15 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3); - roce_set_field(mpt_entry->byte_48_mode_ba, - V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(pbl_ba >> 3)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0])); - roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M, - V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0])); + hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0])); mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1])); - roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M, - V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1])); - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1])); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); return 0; } @@ -3257,7 +3185,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr) { struct hns_roce_v2_mpt_entry *mpt_entry; - int ret; mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); @@ -3296,9 +3223,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); - ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); - - return ret; + return set_mtpt_pbl(hr_dev, mpt_entry, mr); } static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, @@ -3309,24 +3234,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, u32 mr_access_flags = mr->access; int ret = 0; - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); if (flags & IB_MR_REREG_ACCESS) { - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, - V2_MPT_BYTE_8_BIND_EN_S, + hr_reg_write(mpt_entry, MPT_BIND_EN, (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, - V2_MPT_BYTE_8_ATOMIC_EN_S, + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, + hr_reg_write(mpt_entry, MPT_RR_EN, mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, + hr_reg_write(mpt_entry, MPT_RW_EN, mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, + hr_reg_write(mpt_entry, MPT_LW_EN, mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0); } @@ -3357,37 +3277,28 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev, return -ENOBUFS; } - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + + hr_reg_enable(mpt_entry, MPT_RA_EN); + hr_reg_enable(mpt_entry, MPT_R_INV_EN); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); + hr_reg_enable(mpt_entry, MPT_FRE); + hr_reg_clear(mpt_entry, MPT_MR_MW); + hr_reg_enable(mpt_entry, MPT_BPD); + hr_reg_clear(mpt_entry, MPT_PA); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1); + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); mpt_entry->pbl_size = cpu_to_le32(mr->npages); mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3)); - roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M, - V2_MPT_BYTE_48_PBL_BA_H_S, - upper_32_bits(pbl_ba >> 3)); - - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); + hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3)); return 0; } @@ -3399,36 +3310,29 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mw->pdn); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, - mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : - mw->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); - - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S, - mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE); + hr_reg_write(mpt_entry, MPT_PD, mw->pdn); + + hr_reg_enable(mpt_entry, MPT_R_INV_EN); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); + hr_reg_enable(mpt_entry, MPT_LW_EN); - roce_set_field(mpt_entry->byte_64_buf_pa1, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, - mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + hr_reg_enable(mpt_entry, MPT_MR_MW); + hr_reg_enable(mpt_entry, MPT_BPD); + hr_reg_clear(mpt_entry, MPT_PA); + hr_reg_write(mpt_entry, MPT_BQP, + mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1); mpt_entry->lkey = cpu_to_le32(mw->rkey); + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, + mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + mw->pbl_hop_num); + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET); + hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ, + mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET); + return 0; } @@ -4966,9 +4870,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (ret) return ret; - if (gid_attr) - is_udp = (gid_attr->gid_type == - IB_GID_TYPE_ROCE_UDP_ENCAP); + is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); } /* Only HIP08 needs to set the vlan_en bits in QPC */ @@ -5949,7 +5851,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); - return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ + return (hr_reg_read(aeqe, AEQE_OWNER) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } @@ -5969,15 +5871,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - event_type = roce_get_field(aeqe->asyn, - HNS_ROCE_V2_AEQE_EVENT_TYPE_M, - HNS_ROCE_V2_AEQE_EVENT_TYPE_S); - sub_type = roce_get_field(aeqe->asyn, - HNS_ROCE_V2_AEQE_SUB_TYPE_M, - HNS_ROCE_V2_AEQE_SUB_TYPE_S); - queue_num = roce_get_field(aeqe->event.queue_event.num, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE); + sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE); + queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -6037,8 +5933,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) (eq->cons_index & (eq->entries - 1)) * eq->eqe_size); - return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ - (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; + return (hr_reg_read(ceqe, CEQE_OWNER) ^ + !!(eq->cons_index & eq->entries)) ? ceqe : NULL; } static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, @@ -6054,8 +5950,7 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, */ dma_rmb(); - cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M, - HNS_ROCE_V2_CEQE_COMP_CQN_S); + cqn = hr_reg_read(ceqe, CEQE_CQN); hns_roce_cq_completion(hr_dev, cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 0d87b627601e..7ffb7824d268 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -41,7 +41,7 @@ #define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 #define HNS_ROCE_V2_MAX_SRQ_SGE 64 #define HNS_ROCE_V2_MAX_CQ_NUM 0x100000 -#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100 +#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100 #define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64 @@ -303,33 +303,6 @@ struct hns_roce_v2_cq_context { #define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0 -#define V2_CQC_BYTE_4_ARM_ST_S 6 -#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6) - -#define V2_CQC_BYTE_4_CEQN_S 15 -#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15) - -#define V2_CQC_BYTE_8_CQN_S 0 -#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) - -#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30 -#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30) - -#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0 -#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0) - -#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0 -#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0) - -#define V2_CQC_BYTE_52_CQE_CNT_S 0 -#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0) - -#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0 -#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0) - -#define V2_CQC_BYTE_56_CQ_PERIOD_S 16 -#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16) - #define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l) #define CQC_CQ_ST CQC_FIELD_LOC(1, 0) @@ -788,12 +761,15 @@ struct hns_roce_v2_mpt_entry { #define MPT_LKEY MPT_FIELD_LOC(223, 192) #define MPT_VA MPT_FIELD_LOC(287, 224) #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) -#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320) +#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352) #define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) #define MPT_RSV0 MPT_FIELD_LOC(383, 382) -#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_PA0_L MPT_FIELD_LOC(415, 384) +#define MPT_PA0_H MPT_FIELD_LOC(441, 416) #define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) -#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PA1_L MPT_FIELD_LOC(479, 448) +#define MPT_PA1_H MPT_FIELD_LOC(505, 480) #define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) #define MPT_RSV2 MPT_FIELD_LOC(507, 507) #define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) @@ -899,48 +875,24 @@ struct hns_roce_v2_ud_send_wqe { u8 dgid[GID_LEN_V2]; }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) - -#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 - -#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8 - -#define V2_UD_SEND_WQE_BYTE_4_SE_S 11 - -#define V2_UD_SEND_WQE_BYTE_16_PD_S 0 -#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24 -#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 -#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16 -#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16) - -#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0 -#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0) - -#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0 -#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0) - -#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16 -#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16) - -#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24 -#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24) - -#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0 -#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0) - -#define V2_UD_SEND_WQE_BYTE_40_SL_S 20 -#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20) - -#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30 - -#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 +#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l) + +#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0) +#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7) +#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8) +#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11) +#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96) +#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120) +#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128) +#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176) +#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224) +#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256) +#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272) +#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280) +#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288) +#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308) +#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318) +#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319) struct hns_roce_v2_rc_send_wqe { __le32 byte_4; @@ -955,42 +907,23 @@ struct hns_roce_v2_rc_send_wqe { __le64 va; }; -#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 -#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) - -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) - -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 -#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) - -#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 -#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) - -#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 - -#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 - -#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9 - -#define V2_RC_SEND_WQE_BYTE_4_SO_S 10 - -#define V2_RC_SEND_WQE_BYTE_4_SE_S 11 - -#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 - -#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 - -#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 -#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) - -#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24 -#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24) - -#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 -#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) - -#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 +#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l) + +#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0) +#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5) +#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13) +#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7) +#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8) +#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9) +#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10) +#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11) +#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12) +#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15) +#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31) +#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96) +#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120) +#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128) +#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159) struct hns_roce_wqe_frmr_seg { __le32 pbl_size; @@ -1033,7 +966,10 @@ struct hns_roce_func_clear { __le32 rsv[4]; }; -#define FUNC_CLEAR_RST_FUN_DONE_S 0 +#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l) + +#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32) + /* Each physical function manages up to 248 virtual functions, it takes up to * 100ms for each function to execute clear. If an abnormal reset occurs, it is * executed twice at most, so it takes up to 249 * 2 * 100ms. @@ -1112,12 +1048,12 @@ struct hns_roce_vf_switch { __le32 resv3; }; -#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 -#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) +#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l) -#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 -#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 -#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 +#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35) +#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65) +#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66) +#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67) struct hns_roce_post_mbox { __le32 in_param_l; @@ -1180,11 +1116,10 @@ struct hns_roce_cfg_sgid_tb { __le32 vf_sgid_type_rsv; }; -#define CFG_SGID_TB_TABLE_IDX_S 0 -#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) +#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l) -#define CFG_SGID_TB_VF_SGID_TYPE_S 0 -#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0) +#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0) +#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160) struct hns_roce_cfg_smac_tb { __le32 tb_idx_rsv; @@ -1192,11 +1127,11 @@ struct hns_roce_cfg_smac_tb { __le32 vf_smac_h_rsv; __le32 rsv[3]; }; -#define CFG_SMAC_TB_IDX_S 0 -#define CFG_SMAC_TB_IDX_M GENMASK(7, 0) -#define CFG_SMAC_TB_VF_SMAC_H_S 0 -#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0) +#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l) + +#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0) +#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64) struct hns_roce_cfg_gmv_tb_a { __le32 vf_sgid_l; @@ -1207,16 +1142,11 @@ struct hns_roce_cfg_gmv_tb_a { __le32 resv; }; -#define CFG_GMV_TB_SGID_IDX_S 0 -#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0) - -#define CFG_GMV_TB_VF_SGID_TYPE_S 0 -#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0) +#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l) -#define CFG_GMV_TB_VF_VLAN_EN_S 2 - -#define CFG_GMV_TB_VF_VLAN_ID_S 16 -#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16) +#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128) +#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130) +#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144) struct hns_roce_cfg_gmv_tb_b { __le32 vf_smac_l; @@ -1225,8 +1155,10 @@ struct hns_roce_cfg_gmv_tb_b { __le32 resv[3]; }; -#define CFG_GMV_TB_SMAC_H_S 0 -#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0) +#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l) + +#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) +#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) #define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 struct hns_roce_query_pf_caps_a { @@ -1278,29 +1210,17 @@ struct hns_roce_query_pf_caps_c { __le16 rq_depth; }; -#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0) +#define PF_CAPS_C_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l) -#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20 -#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20) - -#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20 -#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20) - -#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0) - -#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0 -#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20 -#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20) +#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0) +#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20) +#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32) +#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52) +#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64) +#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96) +#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128) +#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148) struct hns_roce_query_pf_caps_d { __le32 wq_hop_num_max_srqs; @@ -1311,20 +1231,26 @@ struct hns_roce_query_pf_caps_d { __le32 num_uars_rsv_pds; __le32 rsv_uars_rsv_qps; }; -#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0 -#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20 -#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20) - -#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22 -#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22) - -#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24 -#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24) -#define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26 -#define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26) +#define PF_CAPS_D_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l) + +#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0) +#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20) +#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22) +#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24) +#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26) +#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64) +#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86) +#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96) +#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118) +#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120) +#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128) +#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148) +#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160) +#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180) + +#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 struct hns_roce_congestion_algorithm { u8 alg_sel; @@ -1333,33 +1259,6 @@ struct hns_roce_congestion_algorithm { u8 wnd_mode_sel; }; -#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0) - -#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22 -#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22) - -#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0 -#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0) - -#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22 -#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22) - -#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24 -#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24) - -#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0 -#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20 -#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20) - -#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0 -#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20 -#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20) - struct hns_roce_query_pf_caps_e { __le32 chunk_size_shift_rsv_mrws; __le32 rsv_cqs; @@ -1371,20 +1270,14 @@ struct hns_roce_query_pf_caps_e { __le16 aeq_period; }; -#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0) +#define PF_CAPS_E_FIELD_LOC(h, l) \ + FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l) -#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20 -#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20) - -#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0) - -#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0 -#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0) +#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0) +#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20) +#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32) +#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64) +#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96) struct hns_roce_cmq_req { __le32 data[6]; @@ -1485,9 +1378,6 @@ struct hns_roce_dip { #define HNS_ROCE_EQ_INIT_CONS_IDX 0 #define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0 -#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31 -#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31 - #define HNS_ROCE_V2_COMP_EQE_NUM 0x1000 #define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000 @@ -1544,18 +1434,6 @@ struct hns_roce_eq_context { #define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320) #define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340) -#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 -#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) - -#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0 -#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0) - -#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8 -#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8) - -#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 -#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) - #define MAX_SERVICE_LEVEL 0x7 struct hns_roce_wqe_atomic_seg { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f73ba619f375..c8af4ebd7cbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -737,7 +737,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table, HEM_TYPE_CQC_TIMER, hr_dev->caps.cqc_timer_entry_sz, - hr_dev->caps.num_cqc_timer, 1); + hr_dev->caps.cqc_timer_bt_num, 1); if (ret) { dev_err(dev, "Failed to init CQC timer memory, aborting.\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b389738d157f..867972c2a894 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -340,7 +340,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret = 0; if (hr_dev->hw->dereg_mr) hr_dev->hw->dereg_mr(hr_dev); @@ -348,7 +347,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) hns_roce_mr_free(hr_dev, mr); kfree(mr); - return ret; + return 0; } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d78373e10aab..48d3616a6d71 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -243,26 +243,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) return 0; } -enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) -{ - switch (state) { - case IB_QPS_RESET: - return HNS_ROCE_QP_STATE_RST; - case IB_QPS_INIT: - return HNS_ROCE_QP_STATE_INIT; - case IB_QPS_RTR: - return HNS_ROCE_QP_STATE_RTR; - case IB_QPS_RTS: - return HNS_ROCE_QP_STATE_RTS; - case IB_QPS_SQD: - return HNS_ROCE_QP_STATE_SQD; - case IB_QPS_ERR: - return HNS_ROCE_QP_STATE_ERR; - default: - return HNS_ROCE_QP_NUM_STATE; - } -} - static void add_qp_to_list(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_cq *send_cq, struct ib_cq *recv_cq) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 259444c0a630..24a154d64630 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -13,61 +13,40 @@ static int hns_roce_fill_cq(struct sk_buff *msg, struct hns_roce_v2_cq_context *context) { if (rdma_nl_put_driver_u32(msg, "state", - roce_get_field(context->byte_4_pg_ceqn, - V2_CQC_BYTE_4_ARM_ST_M, - V2_CQC_BYTE_4_ARM_ST_S))) + hr_reg_read(context, CQC_ARM_ST))) + goto err; if (rdma_nl_put_driver_u32(msg, "ceqn", - roce_get_field(context->byte_4_pg_ceqn, - V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S))) + hr_reg_read(context, CQC_CEQN))) goto err; if (rdma_nl_put_driver_u32(msg, "cqn", - roce_get_field(context->byte_8_cqn, - V2_CQC_BYTE_8_CQN_M, - V2_CQC_BYTE_8_CQN_S))) + hr_reg_read(context, CQC_CQN))) goto err; if (rdma_nl_put_driver_u32(msg, "hopnum", - roce_get_field(context->byte_16_hop_addr, - V2_CQC_BYTE_16_CQE_HOP_NUM_M, - V2_CQC_BYTE_16_CQE_HOP_NUM_S))) + hr_reg_read(context, CQC_CQE_HOP_NUM))) goto err; - if (rdma_nl_put_driver_u32( - msg, "pi", - roce_get_field(context->byte_28_cq_pi, - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M, - V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S))) + if (rdma_nl_put_driver_u32(msg, "pi", + hr_reg_read(context, CQC_CQ_PRODUCER_IDX))) goto err; - if (rdma_nl_put_driver_u32( - msg, "ci", - roce_get_field(context->byte_32_cq_ci, - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M, - V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S))) + if (rdma_nl_put_driver_u32(msg, "ci", + hr_reg_read(context, CQC_CQ_CONSUMER_IDX))) goto err; - if (rdma_nl_put_driver_u32( - msg, "coalesce", - roce_get_field(context->byte_56_cqe_period_maxcnt, - V2_CQC_BYTE_56_CQ_MAX_CNT_M, - V2_CQC_BYTE_56_CQ_MAX_CNT_S))) + if (rdma_nl_put_driver_u32(msg, "coalesce", + hr_reg_read(context, CQC_CQ_MAX_CNT))) goto err; - if (rdma_nl_put_driver_u32( - msg, "period", - roce_get_field(context->byte_56_cqe_period_maxcnt, - V2_CQC_BYTE_56_CQ_PERIOD_M, - V2_CQC_BYTE_56_CQ_PERIOD_S))) + if (rdma_nl_put_driver_u32(msg, "period", + hr_reg_read(context, CQC_CQ_PERIOD))) goto err; if (rdma_nl_put_driver_u32(msg, "cnt", - roce_get_field(context->byte_52_cqe_cnt, - V2_CQC_BYTE_52_CQE_CNT_M, - V2_CQC_BYTE_52_CQE_CNT_S))) + hr_reg_read(context, CQC_CQE_CNT))) goto err; return 0; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 3dc9b5801da1..dd3943d22dc6 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq) struct irdma_cq *cq = iwcq->back_cq; if (!cq->user_mode) - cq->armed = false; + atomic_set(&cq->armed, 0); if (cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } @@ -1827,10 +1827,6 @@ int irdma_rt_init_hw(struct irdma_device *iwdev, rf->rsrc_created = true; } - iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) irdma_alloc_set_mac(iwdev); irdma_add_ip(iwdev); @@ -2693,24 +2689,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask) info.sq = flush_mask & IRDMA_FLUSH_SQ; info.rq = flush_mask & IRDMA_FLUSH_RQ; - if (flush_mask & IRDMA_REFLUSH) { - if (info.sq) - iwqp->sc_qp.flush_sq = false; - if (info.rq) - iwqp->sc_qp.flush_rq = false; - } - /* Generate userflush errors in CQE */ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.sq_minor_code = FLUSH_GENERAL_ERR; info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR; info.rq_minor_code = FLUSH_GENERAL_ERR; info.userflushcode = true; - if (flush_code) { - if (info.sq && iwqp->sc_qp.sq_flush_code) - info.sq_minor_code = flush_code; - if (info.rq && iwqp->sc_qp.rq_flush_code) - info.rq_minor_code = flush_code; + + if (flush_mask & IRDMA_REFLUSH) { + if (info.sq) + iwqp->sc_qp.flush_sq = false; + if (info.rq) + iwqp->sc_qp.flush_rq = false; + } else { + if (flush_code) { + if (info.sq && iwqp->sc_qp.sq_flush_code) + info.sq_minor_code = flush_code; + if (info.rq && iwqp->sc_qp.rq_flush_code) + info.rq_minor_code = flush_code; + } + if (!iwqp->user_mode) + queue_delayed_work(iwqp->iwdev->cleanup_wq, + &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); } /* Issue flush */ diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 5123f5feaa2f..ef862bced20f 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -338,7 +338,6 @@ struct irdma_device { u32 roce_ackcreds; u32 vendor_id; u32 vendor_part_id; - u32 device_cap_flags; u32 push_mode; u32 rcv_wnd; u16 mac_ip_table_idx; diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c index 397f3d070f90..4ec9639f1bdb 100644 --- a/drivers/infiniband/hw/irdma/puda.c +++ b/drivers/infiniband/hw/irdma/puda.c @@ -191,7 +191,6 @@ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev, static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { - __le64 *wqe = NULL; int ret_code = 0; *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); @@ -199,11 +198,9 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, qp->swqe_polarity = !qp->swqe_polarity; IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code); if (ret_code) - return wqe; - - wqe = qp->sq_base[*wqe_idx].elem; + return NULL; - return wqe; + return qp->sq_base[*wqe_idx].elem; } /** diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 81760415d66c..ab3c5208a123 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2495,3 +2495,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq) return polarity != ukcq->polarity; } + +void irdma_remove_cmpls_list(struct irdma_cq *iwcq) +{ + struct irdma_cmpl_gen *cmpl_node; + struct list_head *tmp_node, *list_node; + + list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) { + cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list); + list_del(&cmpl_node->list); + kfree(cmpl_node); + } +} + +int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info) +{ + struct irdma_cmpl_gen *cmpl; + + if (list_empty(&iwcq->cmpl_generated)) + return -ENOENT; + cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list); + list_del(&cmpl->list); + memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); + kfree(cmpl); + + ibdev_dbg(iwcq->ibcq.device, + "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n", + __func__, cq_poll_info->qp_id, cq_poll_info->op_type, + cq_poll_info->wr_id); + + return 0; +} + +/** + * irdma_set_cpi_common_values - fill in values for polling info struct + * @cpi: resulting structure of cq_poll_info type + * @qp: QPair + * @qp_num: id of the QP + */ +static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, + struct irdma_qp_uk *qp, u32 qp_num) +{ + cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; + cpi->error = true; + cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; + cpi->minor_err = FLUSH_GENERAL_ERR; + cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp; + cpi->qp_id = qp_num; +} + +static inline void irdma_comp_handler(struct irdma_cq *cq) +{ + if (!cq->ibcq.comp_handler) + return; + if (atomic_cmpxchg(&cq->armed, 1, 0)) + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +void irdma_generate_flush_completions(struct irdma_qp *iwqp) +{ + struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk; + struct irdma_ring *sq_ring = &qp->sq_ring; + struct irdma_ring *rq_ring = &qp->rq_ring; + struct irdma_cmpl_gen *cmpl; + __le64 *sw_wqe; + u64 wqe_qword; + u32 wqe_idx; + bool compl_generated = false; + unsigned long flags1; + + spin_lock_irqsave(&iwqp->iwscq->lock, flags1); + if (irdma_cq_empty(iwqp->iwscq)) { + unsigned long flags2; + + spin_lock_irqsave(&iwqp->lock, flags2); + while (IRDMA_RING_MORE_WORK(*sq_ring)) { + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); + if (!cmpl) { + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + return; + } + + wqe_idx = sq_ring->tail; + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); + + cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; + sw_wqe = qp->sq_base[wqe_idx].elem; + get_64bit_val(sw_wqe, 24, &wqe_qword); + cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE); + /* remove the SQ WR by moving SQ tail*/ + IRDMA_RING_SET_TAIL(*sq_ring, + sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); + + ibdev_dbg(iwqp->iwscq->ibcq.device, + "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n", + __func__, cmpl->cpi.wr_id, qp->qp_id); + list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated); + compl_generated = true; + } + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + if (compl_generated) + irdma_comp_handler(iwqp->iwrcq); + } else { + spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); + } + + spin_lock_irqsave(&iwqp->iwrcq->lock, flags1); + if (irdma_cq_empty(iwqp->iwrcq)) { + unsigned long flags2; + + spin_lock_irqsave(&iwqp->lock, flags2); + while (IRDMA_RING_MORE_WORK(*rq_ring)) { + cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC); + if (!cmpl) { + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + return; + } + + wqe_idx = rq_ring->tail; + irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); + + cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; + cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; + /* remove the RQ WR by moving RQ tail */ + IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); + ibdev_dbg(iwqp->iwrcq->ibcq.device, + "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n", + __func__, cmpl->cpi.wr_id, qp->qp_id, + wqe_idx); + list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated); + + compl_generated = true; + } + spin_unlock_irqrestore(&iwqp->lock, flags2); + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + if (compl_generated) + irdma_comp_handler(iwqp->iwrcq); + } else { + spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); + } +} diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 52f3e88f8569..c4412ece5a6d 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -25,7 +25,9 @@ static int irdma_query_device(struct ib_device *ibdev, iwdev->netdev->dev_addr); props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 | irdma_fw_minor_ver(&rf->sc_dev); - props->device_cap_flags = iwdev->device_cap_flags; + props->device_cap_flags = IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; + props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; props->vendor_id = pcidev->vendor; props->vendor_part_id = pcidev->device; @@ -533,6 +535,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) irdma_modify_qp_to_err(&iwqp->sc_qp); + if (!iwqp->user_mode) + cancel_delayed_work_sync(&iwqp->dwork_flush); + irdma_qp_rem_ref(&iwqp->ibqp); wait_for_completion(&iwqp->free_qp); irdma_free_lsmm_rsrc(iwqp); @@ -788,6 +793,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, return 0; } +static void irdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush); + + irdma_generate_flush_completions(iwqp); +} + /** * irdma_create_qp - create qp * @ibqp: ptr of qp @@ -907,6 +920,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { + INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); } @@ -1398,11 +1412,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (iwqp->ibqp_state > IB_QPS_RTS && !iwqp->flush_issued) { - iwqp->flush_issued = 1; spin_unlock_irqrestore(&iwqp->lock, flags); irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ | IRDMA_FLUSH_WAIT); + iwqp->flush_issued = 1; } else { spin_unlock_irqrestore(&iwqp->lock, flags); } @@ -1755,6 +1769,8 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) unsigned long flags; spin_lock_irqsave(&iwcq->lock, flags); + if (!list_empty(&iwcq->cmpl_generated)) + irdma_remove_cmpls_list(iwcq); if (!list_empty(&iwcq->resize_list)) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); @@ -1959,6 +1975,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, cq->back_cq = iwcq; spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); + INIT_LIST_HEAD(&iwcq->cmpl_generated); info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; @@ -3044,15 +3061,12 @@ static int irdma_post_send(struct ib_qp *ibqp, unsigned long flags; bool inv_stag; struct irdma_ah *ah; - bool reflush = false; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; dev = &iwqp->iwdev->rf->sc_dev; spin_lock_irqsave(&iwqp->lock, flags); - if (iwqp->flush_issued && ukqp->sq_flush_complete) - reflush = true; while (ib_wr) { memset(&info, 0, sizeof(info)); inv_stag = false; @@ -3202,15 +3216,14 @@ static int irdma_post_send(struct ib_qp *ibqp, ib_wr = ib_wr->next; } - if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) { - irdma_uk_qp_post_wr(ukqp); + if (!iwqp->flush_issued) { + if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) + irdma_uk_qp_post_wr(ukqp); spin_unlock_irqrestore(&iwqp->lock, flags); - } else if (reflush) { - ukqp->sq_flush_complete = false; - spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH); } else { spin_unlock_irqrestore(&iwqp->lock, flags); + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); } if (err) *bad_wr = ib_wr; @@ -3233,14 +3246,11 @@ static int irdma_post_recv(struct ib_qp *ibqp, struct irdma_post_rq_info post_recv = {}; unsigned long flags; int err = 0; - bool reflush = false; iwqp = to_iwqp(ibqp); ukqp = &iwqp->sc_qp.qp_uk; spin_lock_irqsave(&iwqp->lock, flags); - if (iwqp->flush_issued && ukqp->rq_flush_complete) - reflush = true; while (ib_wr) { post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; @@ -3256,13 +3266,10 @@ static int irdma_post_recv(struct ib_qp *ibqp, } out: - if (reflush) { - ukqp->rq_flush_complete = false; - spin_unlock_irqrestore(&iwqp->lock, flags); - irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH); - } else { - spin_unlock_irqrestore(&iwqp->lock, flags); - } + spin_unlock_irqrestore(&iwqp->lock, flags); + if (iwqp->flush_issued) + mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush, + msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)); if (err) *bad_wr = ib_wr; @@ -3474,6 +3481,11 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc /* check the current CQ for new cqes */ while (npolled < num_entries) { ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled); + if (ret == -ENOENT) { + ret = irdma_generated_cmpls(iwcq, cur_cqe); + if (!ret) + irdma_process_cqe(entry + npolled, cur_cqe); + } if (!ret) { ++npolled; cq_new_cqe = true; @@ -3555,13 +3567,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED) promo_event = true; - if (!iwcq->armed || promo_event) { - iwcq->armed = true; + if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) { iwcq->last_notify = cq_notify; irdma_uk_cq_request_notification(ukcq, cq_notify); } - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq)) + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated))) ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 08ba24d0b843..4309b7159f42 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -4,6 +4,7 @@ #define IRDMA_VERBS_H #define IRDMA_MAX_SAVED_PHY_PGADDR 4 +#define IRDMA_FLUSH_DELAY_MS 20 #define IRDMA_PKEY_TBL_SZ 1 #define IRDMA_DEFAULT_PKEY 0xFFFF @@ -115,7 +116,7 @@ struct irdma_cq { u16 cq_size; u16 cq_num; bool user_mode; - bool armed; + atomic_t armed; enum irdma_cmpl_notify last_notify; u32 polled_cmpls; u32 cq_mem_size; @@ -126,6 +127,12 @@ struct irdma_cq { struct irdma_pbl *iwpbl_shadow; struct list_head resize_list; struct irdma_cq_poll_info cur_cqe; + struct list_head cmpl_generated; +}; + +struct irdma_cmpl_gen { + struct list_head list; + struct irdma_cq_poll_info cpi; }; struct disconn_work { @@ -166,6 +173,7 @@ struct irdma_qp { refcount_t refcnt; struct iw_cm_id *cm_id; struct irdma_cm_node *cm_node; + struct delayed_work dwork_flush; struct ib_mr *lsmm_mr; atomic_t hw_mod_qp_pend; enum ib_qp_state ibqp_state; @@ -229,4 +237,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); void irdma_ib_dealloc_device(struct ib_device *ibdev); void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event); +void irdma_generate_flush_completions(struct irdma_qp *iwqp); +void irdma_remove_cmpls_list(struct irdma_cq *iwcq); +int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info); #endif /* IRDMA_VERBS_H */ diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index 4aff1c8298b1..12b481d138cf 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -80,6 +80,7 @@ struct cm_req_msg { union ib_gid primary_path_sgid; }; +static struct workqueue_struct *cm_wq; static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { @@ -288,10 +289,10 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) /*make sure that there is no schedule inside the scheduled work.*/ if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; - schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } else if (id->scheduled_delete) { /* Adjust timeout if already scheduled */ - mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); @@ -370,7 +371,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl ret = xa_err(item); else /* If a retry, adjust delayed work */ - mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); goto err_or_exists; } xa_unlock(&sriov->xa_rej_tmout); @@ -393,7 +394,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl return xa_err(old); } - schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); return 0; @@ -500,7 +501,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) xa_lock(&sriov->xa_rej_tmout); xa_for_each(&sriov->xa_rej_tmout, id, item) { if (slave < 0 || slave == item->slave) { - mod_delayed_work(system_wq, &item->timeout, 0); + mod_delayed_work(cm_wq, &item->timeout, 0); flush_needed = true; ++cnt; } @@ -508,7 +509,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) xa_unlock(&sriov->xa_rej_tmout); if (flush_needed) { - flush_scheduled_work(); + flush_workqueue(cm_wq); pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", cnt, slave); } @@ -540,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) spin_unlock(&sriov->id_map_lock); if (need_flush) - flush_scheduled_work(); /* make sure all timers were flushed */ + flush_workqueue(cm_wq); /* make sure all timers were flushed */ /* now, remove all leftover entries from databases*/ spin_lock(&sriov->id_map_lock); @@ -587,3 +588,17 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) rej_tmout_xa_cleanup(sriov, slave); } + +int mlx4_ib_cm_init(void) +{ + cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0); + if (!cm_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_ib_cm_destroy(void) +{ + destroy_workqueue(cm_wq); +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 93b1650eacfa..ba47874f90d3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -479,8 +479,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_RC_RNR_NAK_GEN; + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) @@ -494,9 +494,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if (dev->dev->caps.max_gso_sz && (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) && (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)) - props->device_cap_flags |= IB_DEVICE_UD_TSO; + props->kernel_cap_flags |= IBK_UD_TSO; if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY; if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) @@ -3307,10 +3307,14 @@ static int __init mlx4_ib_init(void) if (!wq) return -ENOMEM; - err = mlx4_ib_mcg_init(); + err = mlx4_ib_cm_init(); if (err) goto clean_wq; + err = mlx4_ib_mcg_init(); + if (err) + goto clean_cm; + err = mlx4_register_interface(&mlx4_ib_interface); if (err) goto clean_mcg; @@ -3320,6 +3324,9 @@ static int __init mlx4_ib_init(void) clean_mcg: mlx4_ib_mcg_destroy(); +clean_cm: + mlx4_ib_cm_destroy(); + clean_wq: destroy_workqueue(wq); return err; @@ -3329,6 +3336,7 @@ static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); mlx4_ib_mcg_destroy(); + mlx4_ib_cm_destroy(); destroy_workqueue(wq); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index d84023b4b1b8..6a3b0f121045 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -937,4 +937,7 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); +int mlx4_ib_cm_init(void); +void mlx4_ib_cm_destroy(void); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index f43380106bd0..612ee8190a2d 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \ restrack.o \ srq.o \ srq_cmd.o \ + umr.o \ wr.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 9c2886bc72cb..39ffb363ba0c 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -1095,11 +1095,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - if (is_egress) { - err = -EINVAL; - goto free; - } - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { struct mlx5_ib_mcounters *mcounters; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 61a3b767262f..b68fddeac0f1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -41,6 +41,7 @@ #include "wr.h" #include "restrack.h" #include "counters.h" +#include "umr.h" #include <rdma/uverbs_std_types.h> #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_verbs.h> @@ -854,13 +855,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_DEVICE_MEM_WINDOW_TYPE_2B; props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); /* We support 'Gappy' memory registration too */ - props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; + props->kernel_cap_flags |= IBK_SG_GAPS_REG; } /* IB_WR_REG_MR always requires changing the entity size with UMR */ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; + props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | @@ -869,7 +870,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, IB_GUARD_T10DIF_CSUM; } if (MLX5_CAP_GEN(mdev, block_lb_mc)) - props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK; if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { if (MLX5_CAP_ETH(mdev, csum_cap)) { @@ -916,7 +917,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - props->device_cap_flags |= IB_DEVICE_UD_TSO; + props->kernel_cap_flags |= IBK_UD_TSO; } if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && @@ -992,7 +993,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; + props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; if (!uhw) { /* ODP for kernel QPs is not implemented for receive @@ -1013,11 +1014,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (MLX5_CAP_GEN(mdev, cd)) - props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; - if (mlx5_core_is_vf(mdev)) - props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET && raw_support) { @@ -4008,12 +4006,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) if (err) mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - if (dev->umrc.qp) - ib_destroy_qp(dev->umrc.qp); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); + mlx5r_umr_resource_cleanup(dev); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -4021,112 +4014,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } -enum { - MAX_UMR_WR = 128, -}; - static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; int ret; - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = ib_create_qp(pd, init_attr); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = ib_modify_qp(qp, attr, - IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = ib_modify_qp(qp, attr, IB_QP_STATE); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = ib_modify_qp(qp, attr, IB_QP_STATE); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; + ret = mlx5r_umr_resource_init(dev); + if (ret) + return ret; - sema_init(&dev->umrc.sem, MAX_UMR_WR); ret = mlx5_mr_cache_init(dev); if (ret) { mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; + mlx5r_umr_resource_cleanup(dev); } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - ib_destroy_qp(qp); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); return ret; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 8b3c83c0b70a..998b67509a53 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -291,16 +291,9 @@ struct mlx5_ib_flow_db { }; /* Use macros here so that don't have to duplicate - * enum ib_send_flags and enum ib_qp_type for low-level driver + * enum ib_qp_type for low-level driver */ -#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0) -#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1) -#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2) -#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3) -#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4) -#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END - #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 /* * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI @@ -311,9 +304,6 @@ struct mlx5_ib_flow_db { #define MLX5_IB_QPT_DCT IB_QPT_RESERVED4 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 -#define MLX5_IB_UMR_OCTOWORD 16 -#define MLX5_IB_UMR_XLT_ALIGNMENT 64 - #define MLX5_IB_UPD_XLT_ZAP BIT(0) #define MLX5_IB_UPD_XLT_ENABLE BIT(1) #define MLX5_IB_UPD_XLT_ATOMIC BIT(2) @@ -539,24 +529,6 @@ struct mlx5_ib_cq_buf { int nent; }; -struct mlx5_umr_wr { - struct ib_send_wr wr; - u64 virt_addr; - u64 offset; - struct ib_pd *pd; - unsigned int page_shift; - unsigned int xlt_size; - u64 length; - int access_flags; - u32 mkey; - u8 ignore_free_state:1; -}; - -static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr) -{ - return container_of(wr, struct mlx5_umr_wr, wr); -} - enum mlx5_ib_cq_pr_flags { MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0, MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1, @@ -1291,9 +1263,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs); int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, - int page_shift, int flags); -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); @@ -1472,9 +1441,6 @@ static inline int is_qp1(enum ib_qp_type qp_type) return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; } -#define MLX5_MAX_UMR_SHIFT 16 -#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) - static inline u32 check_cq_create_flags(u32 flags) { /* @@ -1546,59 +1512,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, - size_t length) -{ - /* - * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is - * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka - * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey - * can never be enabled without this capability. Simplify this weird - * quirky hardware by just saying it can't use PAS lists with UMR at - * all. - */ - if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) - return false; - - /* - * length is the size of the MR in bytes when mlx5_ib_update_xlt() is - * used. - */ - if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && - length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) - return false; - return true; -} - -/* - * true if an existing MR can be reconfigured to new access_flags using UMR. - * Older HW cannot use UMR to update certain elements of the MKC. See - * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() - */ -static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, - unsigned int current_access_flags, - unsigned int target_access_flags) -{ - unsigned int diffs = current_access_flags ^ target_access_flags; - - if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && - MLX5_CAP_GEN(dev->mdev, atomic) && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) - return false; - - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - return false; - - if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - return false; - - return true; -} - static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mkey *mmkey) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 32ef67e9a6a7..1e7653c997b5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -44,13 +44,7 @@ #include <rdma/ib_verbs.h> #include "dm.h" #include "mlx5_ib.h" - -/* - * We can't use an array for xlt_emergency_page because dma_map_single doesn't - * work on kernel modules memory - */ -void *xlt_emergency_page; -static DEFINE_MUTEX(xlt_emergency_page_mutex); +#include "umr.h" enum { MAX_PENDING_REG_MR = 8, @@ -128,11 +122,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, static int mr_cache_max_order(struct mlx5_ib_dev *dev); static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); -static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) -{ - return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); -} - static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); @@ -600,7 +589,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr; /* Matches access in alloc_cache_mr() */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + if (!mlx5r_umr_can_reconfig(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); spin_lock_irq(&ent->lock); @@ -741,7 +730,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && - mlx5_ib_can_load_pas_with_umr(dev, 0)) + mlx5r_umr_can_load_pas(dev, 0)) ent->limit = dev->mdev->profile.mr_cache[i].limit; else ent->limit = 0; @@ -848,49 +837,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev) return MLX5_MAX_UMR_SHIFT; } -static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct mlx5_ib_umr_context *context = - container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); - - context->status = wc->status; - complete(&context->done); -} - -static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) -{ - context->cqe.done = mlx5_ib_umr_done; - context->status = -1; - init_completion(&context->done); -} - -static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, - struct mlx5_umr_wr *umrwr) -{ - struct umr_common *umrc = &dev->umrc; - const struct ib_send_wr *bad; - int err; - struct mlx5_ib_umr_context umr_context; - - mlx5_ib_init_umr_context(&umr_context); - umrwr->wr.wr_cqe = &umr_context.cqe; - - down(&umrc->sem); - err = ib_post_send(umrc->qp, &umrwr->wr, &bad); - if (err) { - mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); - } else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed (%u)\n", - umr_context.status); - err = -EFAULT; - } - } - up(&umrc->sem); - return err; -} - static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, unsigned int order) { @@ -949,7 +895,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, * cache then synchronously create an uncached one. */ if (!ent || ent->limit == 0 || - !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { + !mlx5r_umr_can_reconfig(dev, 0, access_flags)) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); @@ -968,289 +914,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } -#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ - MLX5_UMR_MTT_ALIGNMENT) -#define MLX5_SPARE_UMR_CHUNK 0x10000 - -/* - * Allocate a temporary buffer to hold the per-page information to transfer to - * HW. For efficiency this should be as large as it can be, but buffer - * allocation failure is not allowed, so try smaller sizes. - */ -static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) -{ - const size_t xlt_chunk_align = - MLX5_UMR_MTT_ALIGNMENT / ent_size; - size_t size; - void *res = NULL; - - static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); - - /* - * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the - * allocation can't trigger any kind of reclaim. - */ - might_sleep(); - - gfp_mask |= __GFP_ZERO | __GFP_NORETRY; - - /* - * If the system already has a suitable high order page then just use - * that, but don't try hard to create one. This max is about 1M, so a - * free x86 huge page will satisfy it. - */ - size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), - MLX5_MAX_UMR_CHUNK); - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - - if (size > MLX5_SPARE_UMR_CHUNK) { - size = MLX5_SPARE_UMR_CHUNK; - *nents = size / ent_size; - res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, - get_order(size)); - if (res) - return res; - } - - *nents = PAGE_SIZE / ent_size; - res = (void *)__get_free_page(gfp_mask); - if (res) - return res; - - mutex_lock(&xlt_emergency_page_mutex); - memset(xlt_emergency_page, 0, PAGE_SIZE); - return xlt_emergency_page; -} - -static void mlx5_ib_free_xlt(void *xlt, size_t length) -{ - if (xlt == xlt_emergency_page) { - mutex_unlock(&xlt_emergency_page_mutex); - return; - } - - free_pages((unsigned long)xlt, get_order(length)); -} - -/* - * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for - * submission. - */ -static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, - struct mlx5_umr_wr *wr, struct ib_sge *sg, - size_t nents, size_t ent_size, - unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - dma_addr_t dma; - void *xlt; - - xlt = mlx5_ib_alloc_xlt(&nents, ent_size, - flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : - GFP_KERNEL); - sg->length = nents * ent_size; - dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); - if (dma_mapping_error(ddev, dma)) { - mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); - mlx5_ib_free_xlt(xlt, sg->length); - return NULL; - } - sg->addr = dma; - sg->lkey = dev->umrc.pd->local_dma_lkey; - - memset(wr, 0, sizeof(*wr)); - wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; - if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) - wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; - wr->wr.sg_list = sg; - wr->wr.num_sge = 1; - wr->wr.opcode = MLX5_IB_WR_UMR; - wr->pd = mr->ibmr.pd; - wr->mkey = mr->mmkey.key; - wr->length = mr->ibmr.length; - wr->virt_addr = mr->ibmr.iova; - wr->access_flags = mr->access_flags; - wr->page_shift = mr->page_shift; - wr->xlt_size = sg->length; - return xlt; -} - -static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, - struct ib_sge *sg) -{ - struct device *ddev = &dev->mdev->pdev->dev; - - dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); - mlx5_ib_free_xlt(xlt, sg->length); -} - -static unsigned int xlt_wr_final_send_flags(unsigned int flags) -{ - unsigned int res = 0; - - if (flags & MLX5_IB_UPD_XLT_ENABLE) - res |= MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) - res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - return res; -} - -int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, - int page_shift, int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - void *xlt; - struct mlx5_umr_wr wr; - struct ib_sge sg; - int err = 0; - int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) - ? sizeof(struct mlx5_klm) - : sizeof(struct mlx5_mtt); - const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; - const int page_mask = page_align - 1; - size_t pages_mapped = 0; - size_t pages_to_map = 0; - size_t pages_iter; - size_t size_to_map = 0; - size_t orig_sg_length; - - if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && - !umr_can_use_indirect_mkey(dev)) - return -EPERM; - - if (WARN_ON(!mr->umem->is_odp)) - return -EINVAL; - - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, - * so we need to align the offset and length accordingly - */ - if (idx & page_mask) { - npages += idx & page_mask; - idx &= ~page_mask; - } - pages_to_map = ALIGN(npages, page_align); - - xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); - if (!xlt) - return -ENOMEM; - pages_iter = sg.length / desc_size; - orig_sg_length = sg.length; - - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } - - wr.page_shift = page_shift; - - for (pages_mapped = 0; - pages_mapped < pages_to_map && !err; - pages_mapped += pages_iter, idx += pages_iter) { - npages = min_t(int, pages_iter, pages_to_map - pages_mapped); - size_to_map = npages * desc_size; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - - sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - - if (pages_mapped + pages_iter >= pages_to_map) - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - - wr.offset = idx * desc_size; - wr.xlt_size = sg.length; - - err = mlx5_ib_post_send_wait(dev, &wr); - } - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, xlt, &sg); - return err; -} - -/* - * Send the DMA list to the HW for a normal MR using UMR. - * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP - * flag may be used. - */ -int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) -{ - struct mlx5_ib_dev *dev = mr_to_mdev(mr); - struct device *ddev = &dev->mdev->pdev->dev; - struct ib_block_iter biter; - struct mlx5_mtt *cur_mtt; - struct mlx5_umr_wr wr; - size_t orig_sg_length; - struct mlx5_mtt *mtt; - size_t final_size; - struct ib_sge sg; - int err = 0; - - if (WARN_ON(mr->umem->is_odp)) - return -EINVAL; - - mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, - ib_umem_num_dma_blocks(mr->umem, - 1 << mr->page_shift), - sizeof(*mtt), flags); - if (!mtt) - return -ENOMEM; - orig_sg_length = sg.length; - - cur_mtt = mtt; - rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter, - mr->umem->sgt_append.sgt.nents, - BIT(mr->page_shift)) { - if (cur_mtt == (void *)mtt + sg.length) { - dma_sync_single_for_device(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - if (err) - goto err; - dma_sync_single_for_cpu(ddev, sg.addr, sg.length, - DMA_TO_DEVICE); - wr.offset += sg.length; - cur_mtt = mtt; - } - - cur_mtt->ptag = - cpu_to_be64(rdma_block_iter_dma_address(&biter) | - MLX5_IB_MTT_PRESENT); - - if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) - cur_mtt->ptag = 0; - - cur_mtt++; - } - - final_size = (void *)cur_mtt - (void *)mtt; - sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); - memset(cur_mtt, 0, sg.length - final_size); - wr.wr.send_flags |= xlt_wr_final_send_flags(flags); - wr.xlt_size = sg.length; - - dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - err = mlx5_ib_post_send_wait(dev, &wr); - -err: - sg.length = orig_sg_length; - mlx5_ib_unmap_free_xlt(dev, mtt, &sg); - return err; -} - /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. @@ -1441,7 +1104,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, bool xlt_with_umr; int err; - xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); + xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length); if (xlt_with_umr) { mr = alloc_cacheable_mr(pd, umem, iova, access_flags); } else { @@ -1467,7 +1130,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { mlx5_ib_dereg_mr(&mr->ibmr, NULL); return ERR_PTR(err); @@ -1504,7 +1167,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } /* ODP requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, @@ -1566,7 +1229,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) if (!umem_dmabuf->sgt) return; - mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); ib_umem_dmabuf_unmap_pages(umem_dmabuf); } @@ -1594,7 +1257,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, offset, virt_addr, length, fd, access_flags); /* dmabuf requires xlt update via umr to work. */ - if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + if (!mlx5r_umr_can_load_pas(dev, length)) return ERR_PTR(-EINVAL); umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, @@ -1631,31 +1294,6 @@ err_dereg_mr: return ERR_PTR(err); } -/** - * revoke_mr - Fence all DMA on the MR - * @mr: The MR to fence - * - * Upon return the NIC will not be doing any DMA to the pages under the MR, - * and any DMA in progress will be completed. Failure of this function - * indicates the HW has failed catastrophically. - */ -static int revoke_mr(struct mlx5_ib_mr *mr) -{ - struct mlx5_umr_wr umrwr = {}; - - if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return 0; - - umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = mr_to_mdev(mr)->umrc.pd; - umrwr.mkey = mr->mmkey.key; - umrwr.ignore_free_state = 1; - - return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); -} - /* * True if the change in access flags can be done via UMR, only some access * flags can be updated. @@ -1669,32 +1307,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) return false; - return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, - target_access_flags); -} - -static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, - int access_flags) -{ - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_umr_wr umrwr = { - .wr = { - .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, - .opcode = MLX5_IB_WR_UMR, - }, - .mkey = mr->mmkey.key, - .pd = pd, - .access_flags = access_flags, - }; - int err; - - err = mlx5_ib_post_send_wait(dev, &umrwr); - if (err) - return err; - - mr->access_flags = access_flags; - return 0; + return mlx5r_umr_can_reconfig(dev, current_access_flags, + target_access_flags); } static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, @@ -1707,7 +1321,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, /* We only track the allocated sizes of MRs from the cache */ if (!mr->cache_ent) return false; - if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) + if (!mlx5r_umr_can_load_pas(dev, new_umem->length)) return false; *page_size = @@ -1732,7 +1346,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, * with it. This ensure the change is atomic relative to any use of the * MR. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return err; @@ -1750,7 +1364,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); mr->umem = new_umem; - err = mlx5_ib_update_mr_pas(mr, upd_flags); + err = mlx5r_umr_update_mr_pas(mr, upd_flags); if (err) { /* * The MR is revoked at this point so there is no issue to free @@ -1797,7 +1411,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* Fast path for PD/access change */ if (can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { - err = umr_rereg_pd_access(mr, new_pd, new_access_flags); + err = mlx5r_umr_rereg_pd_access(mr, new_pd, + new_access_flags); if (err) return ERR_PTR(err); return NULL; @@ -1810,7 +1425,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * Only one active MR can refer to a umem at one time, revoke * the old MR before assigning the umem to the new one. */ - err = revoke_mr(mr); + err = mlx5r_umr_revoke_mr(mr); if (err) return ERR_PTR(err); umem = mr->umem; @@ -1955,7 +1570,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) /* Stop DMA */ if (mr->cache_ent) { - if (revoke_mr(mr)) { + if (mlx5r_umr_revoke_mr(mr)) { spin_lock_irq(&mr->cache_ent->lock); mr->cache_ent->total_mrs--; spin_unlock_irq(&mr->cache_ent->lock); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 41c964a45f89..84da5674e1ab 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -38,6 +38,7 @@ #include "mlx5_ib.h" #include "cmd.h" +#include "umr.h" #include "qp.h" #include <linux/mlx5/eq.h> @@ -117,7 +118,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * * xa_store() * mutex_lock(umem_mutex) - * mlx5_ib_update_xlt() + * mlx5r_umr_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * @@ -198,9 +199,9 @@ static void free_implicit_child_mr_work(struct work_struct *work) mlx5r_deref_wait_odp_mkey(&mr->mmkey); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, - 1, 0, - MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr->parent, + ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0, + MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); mlx5_ib_dereg_mr(&mr->ibmr, NULL); @@ -282,19 +283,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, u64 umr_offset = idx & umr_block_mask; if (in_block && umr_offset == 0) { - mlx5_ib_update_xlt(mr, blk_start_idx, - idx - blk_start_idx, 0, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr, blk_start_idx, + idx - blk_start_idx, 0, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; } } } if (in_block) - mlx5_ib_update_xlt(mr, blk_start_idx, - idx - blk_start_idx + 1, 0, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ATOMIC); + mlx5r_umr_update_xlt(mr, blk_start_idx, + idx - blk_start_idx + 1, 0, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ATOMIC); mlx5_update_odp_stats(mr, invalidations, invalidations); @@ -323,8 +324,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_load_pas_with_umr(dev, 0)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -442,11 +442,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, */ refcount_set(&mr->mmkey.usecount, 2); - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); if (err) { ret = ERR_PTR(err); goto out_mr; @@ -487,8 +487,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct mlx5_ib_mr *imr; int err; - if (!mlx5_ib_can_load_pas_with_umr(dev, - MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) + if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE)) return ERR_PTR(-EOPNOTSUPP); umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); @@ -510,16 +509,15 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.lkey = imr->mmkey.key; imr->ibmr.rkey = imr->mmkey.key; imr->ibmr.device = &dev->ib_dev; - imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; xa_init(&imr->implicit_children); - err = mlx5_ib_update_xlt(imr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); + err = mlx5r_umr_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); if (err) goto out_mr; @@ -582,7 +580,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, * No need to check whether the MTTs really belong to this MR, since * ib_umem_odp_map_dma_and_lock already checks this. */ - ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); + ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags); mutex_unlock(&odp->umem_mutex); if (ret < 0) { @@ -680,9 +678,9 @@ out: * next pagefault handler will see the new information. */ mutex_lock(&odp_imr->umem_mutex); - err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); + err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); if (err) { mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n"); @@ -716,7 +714,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, ib_umem_dmabuf_unmap_pages(umem_dmabuf); err = -EINVAL; } else { - err = mlx5_ib_update_mr_pas(mr, xlt_flags); + err = mlx5r_umr_update_mr_pas(mr, xlt_flags); } dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index fb8669c02546..40d9410ec303 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -40,6 +40,7 @@ #include "ib_rep.h" #include "counters.h" #include "cmd.h" +#include "umr.h" #include "qp.h" #include "wr.h" diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c new file mode 100644 index 000000000000..3a48364c0918 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#include <rdma/ib_umem_odp.h> +#include "mlx5_ib.h" +#include "umr.h" +#include "wr.h" + +/* + * We can't use an array for xlt_emergency_page because dma_map_single doesn't + * work on kernel modules memory + */ +void *xlt_emergency_page; +static DEFINE_MUTEX(xlt_emergency_page_mutex); + +static __be64 get_umr_enable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_disable_mr_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_translation_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev) +{ + u64 result; + + result = MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW; + + if (MLX5_CAP_GEN(dev->mdev, atomic)) + result |= MLX5_MKEY_MASK_A; + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; + + return cpu_to_be64(result); +} + +static __be64 get_umr_update_pd_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_PD; + + return cpu_to_be64(result); +} + +static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) +{ + if (mask & MLX5_MKEY_MASK_PAGE_SIZE && + MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_A && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return -EPERM; + + if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return -EPERM; + + return 0; +} + +enum { + MAX_UMR_WR = 128, +}; + +static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + struct ib_qp_attr attr = {}; + int ret; + + attr.qp_state = IB_QPS_INIT; + attr.port_num = 1; + ret = ib_modify_qp(qp, &attr, + IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + return ret; + } + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IB_QPS_RTR; + + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + return ret; + } + + memset(&attr, 0, sizeof(attr)); + attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &attr, IB_QP_STATE); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + return ret; + } + + return 0; +} + +int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) +{ + struct ib_qp_init_attr init_attr = {}; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + return PTR_ERR(pd); + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto destroy_pd; + } + + init_attr.send_cq = cq; + init_attr.recv_cq = cq; + init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr.cap.max_send_wr = MAX_UMR_WR; + init_attr.cap.max_send_sge = 1; + init_attr.qp_type = MLX5_IB_QPT_REG_UMR; + init_attr.port_num = 1; + qp = ib_create_qp(pd, &init_attr); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto destroy_cq; + } + + ret = mlx5r_umr_qp_rst2rts(dev, qp); + if (ret) + goto destroy_qp; + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + + return 0; + +destroy_qp: + ib_destroy_qp(qp); +destroy_cq: + ib_free_cq(cq); +destroy_pd: + ib_dealloc_pd(pd); + return ret; +} + +void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) +{ + ib_destroy_qp(dev->umrc.qp); + ib_free_cq(dev->umrc.cq); + ib_dealloc_pd(dev->umrc.pd); +} + +static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + unsigned int wqe_size = + with_data ? sizeof(struct mlx5r_umr_wqe) : + sizeof(struct mlx5r_umr_wqe) - + sizeof(struct mlx5_wqe_data_seg); + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + union { + struct ib_cqe *ib_cqe; + u64 wr_id; + } id; + void *cur_edge, *seg; + unsigned long flags; + unsigned int idx; + int size, err; + + if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0, + cpu_to_be32(mkey), false, false); + if (WARN_ON(err)) + goto out; + + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + + mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size); + + id.ib_cqe = cqe; + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, + MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR); + + mlx5r_ring_db(qp, 1, ctrl); + +out: + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct mlx5_ib_umr_context *context = + container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); + + context->status = wc->status; + complete(&context->done); +} + +static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context) +{ + context->cqe.done = mlx5r_umr_done; + init_completion(&context->done); +} + +static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct mlx5r_umr_context umr_context; + int err; + + err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask)); + if (WARN_ON(err)) + return err; + + mlx5r_umr_init_context(&umr_context); + + down(&umrc->sem); + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, + with_data); + if (err) + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); + else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed (%u)\n", + umr_context.status); + err = -EFAULT; + } + } + up(&umrc->sem); + return err; +} + +/** + * mlx5r_umr_revoke_mr - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA in progress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct mlx5r_umr_wqe wqe = {}; + + if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + return 0; + + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask(); + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; + + MLX5_SET(mkc, &wqe.mkey_seg, free, 1); + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn); + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, + mlx5_mkey_variant(mr->mmkey.key)); + + return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); +} + +static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *seg, + unsigned int access_flags) +{ + MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, seg, lr, 1); + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); +} + +int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct mlx5r_umr_wqe wqe = {}; + int err; + + wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev); + wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE; + wqe.ctrl_seg.flags |= MLX5_UMR_INLINE; + + mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags); + MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0, + mlx5_mkey_variant(mr->mmkey.key)); + + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false); + if (err) + return err; + + mr->access_flags = access_flags; + return 0; +} + +#define MLX5_MAX_UMR_CHUNK \ + ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT) +#define MLX5_SPARE_UMR_CHUNK 0x10000 + +/* + * Allocate a temporary buffer to hold the per-page information to transfer to + * HW. For efficiency this should be as large as it can be, but buffer + * allocation failure is not allowed, so try smaller sizes. + */ +static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) +{ + const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size; + size_t size; + void *res = NULL; + + static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); + + /* + * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the + * allocation can't trigger any kind of reclaim. + */ + might_sleep(); + + gfp_mask |= __GFP_ZERO | __GFP_NORETRY; + + /* + * If the system already has a suitable high order page then just use + * that, but don't try hard to create one. This max is about 1M, so a + * free x86 huge page will satisfy it. + */ + size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), + MLX5_MAX_UMR_CHUNK); + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + + if (size > MLX5_SPARE_UMR_CHUNK) { + size = MLX5_SPARE_UMR_CHUNK; + *nents = size / ent_size; + res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, + get_order(size)); + if (res) + return res; + } + + *nents = PAGE_SIZE / ent_size; + res = (void *)__get_free_page(gfp_mask); + if (res) + return res; + + mutex_lock(&xlt_emergency_page_mutex); + memset(xlt_emergency_page, 0, PAGE_SIZE); + return xlt_emergency_page; +} + +static void mlx5r_umr_free_xlt(void *xlt, size_t length) +{ + if (xlt == xlt_emergency_page) { + mutex_unlock(&xlt_emergency_page_mutex); + return; + } + + free_pages((unsigned long)xlt, get_order(length)); +} + +static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, + struct ib_sge *sg) +{ + struct device *ddev = &dev->mdev->pdev->dev; + + dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); + mlx5r_umr_free_xlt(xlt, sg->length); +} + +/* + * Create an XLT buffer ready for submission. + */ +static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg, + size_t nents, size_t ent_size, + unsigned int flags) +{ + struct device *ddev = &dev->mdev->pdev->dev; + dma_addr_t dma; + void *xlt; + + xlt = mlx5r_umr_alloc_xlt(&nents, ent_size, + flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : + GFP_KERNEL); + sg->length = nents * ent_size; + dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); + if (dma_mapping_error(ddev, dma)) { + mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); + mlx5r_umr_free_xlt(xlt, sg->length); + return NULL; + } + sg->addr = dma; + sg->lkey = dev->umrc.pd->local_dma_lkey; + + return xlt; +} + +static void +mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, + unsigned int flags, struct ib_sge *sg) +{ + if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) + /* fail if free */ + ctrl_seg->flags = MLX5_UMR_CHECK_FREE; + else + /* fail if not free */ + ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE; + ctrl_seg->xlt_octowords = + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); +} + +static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *mkey_seg, + struct mlx5_ib_mr *mr, + unsigned int page_shift) +{ + mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags); + MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn); + MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova); + MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length); + MLX5_SET(mkc, mkey_seg, log_page_size, page_shift); + MLX5_SET(mkc, mkey_seg, qpn, 0xffffff); + MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key)); +} + +static void +mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg, + struct ib_sge *sg) +{ + data_seg->byte_count = cpu_to_be32(sg->length); + data_seg->lkey = cpu_to_be32(sg->lkey); + data_seg->addr = cpu_to_be64(sg->addr); +} + +static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg, + u64 offset) +{ + u64 octo_offset = mlx5r_umr_get_xlt_octo(offset); + + ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff); + ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16); + ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; +} + +static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, + struct mlx5r_umr_wqe *wqe, + struct mlx5_ib_mr *mr, struct ib_sge *sg, + unsigned int flags) +{ + bool update_pd_access, update_translation; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask(); + + update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE || + flags & MLX5_IB_UPD_XLT_PD || + flags & MLX5_IB_UPD_XLT_ACCESS; + + if (update_pd_access) { + wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev); + wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask(); + } + + update_translation = + flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; + + if (update_translation) { + wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); + if (!mr->ibmr.length) + MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); + } + + wqe->ctrl_seg.xlt_octowords = + cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length)); + wqe->data_seg.byte_count = cpu_to_be32(sg->length); +} + +/* + * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. + */ +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + struct mlx5r_umr_wqe wqe = {}; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + u64 offset = 0; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5r_umr_create_xlt( + dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + + orig_sg_length = sg.length; + + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, + mr->page_shift); + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); + + cur_mtt = mtt; + rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter, + mr->umem->sgt_append.sgt.nents, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, + true); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + offset += sg.length; + mlx5r_umr_update_offset(&wqe.ctrl_seg, offset); + + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + + cur_mtt++; + } + + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); + +err: + sg.length = orig_sg_length; + mlx5r_umr_unmap_free_xlt(dev, mtt, &sg); + return err; +} + +static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) +{ + return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); +} + +int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, + int page_shift, int flags) +{ + int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) + ? sizeof(struct mlx5_klm) + : sizeof(struct mlx5_mtt); + const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; + struct mlx5_ib_dev *dev = mr_to_mdev(mr); + struct device *ddev = &dev->mdev->pdev->dev; + const int page_mask = page_align - 1; + struct mlx5r_umr_wqe wqe = {}; + size_t pages_mapped = 0; + size_t pages_to_map = 0; + size_t size_to_map = 0; + size_t orig_sg_length; + size_t pages_iter; + struct ib_sge sg; + int err = 0; + void *xlt; + + if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && + !umr_can_use_indirect_mkey(dev)) + return -EPERM; + + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, + * so we need to align the offset and length accordingly + */ + if (idx & page_mask) { + npages += idx & page_mask; + idx &= ~page_mask; + } + pages_to_map = ALIGN(npages, page_align); + + xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags); + if (!xlt) + return -ENOMEM; + + pages_iter = sg.length / desc_size; + orig_sg_length = sg.length; + + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + + pages_to_map = min_t(size_t, pages_to_map, max_pages); + } + + mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg); + mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift); + mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg); + + for (pages_mapped = 0; + pages_mapped < pages_to_map && !err; + pages_mapped += pages_iter, idx += pages_iter) { + npages = min_t(int, pages_iter, pages_to_map - pages_mapped); + size_to_map = npages * desc_size; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); + + if (pages_mapped + pages_iter >= pages_to_map) + mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags); + mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size); + err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true); + } + sg.length = orig_sg_length; + mlx5r_umr_unmap_free_xlt(dev, xlt, &sg); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h new file mode 100644 index 000000000000..c9d0021381a2 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef _MLX5_IB_UMR_H +#define _MLX5_IB_UMR_H + +#include "mlx5_ib.h" + + +#define MLX5_MAX_UMR_SHIFT 16 +#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) + +#define MLX5_IB_UMR_OCTOWORD 16 +#define MLX5_IB_UMR_XLT_ALIGNMENT 64 + +int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev); +void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev); + +static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev, + size_t length) +{ + /* + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey + * can never be enabled without this capability. Simplify this weird + * quirky hardware by just saying it can't use PAS lists with UMR at + * all. + */ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + /* + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is + * used. + */ + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) + return false; + return true; +} + +/* + * true if an existing MR can be reconfigured to new access_flags using UMR. + * Older HW cannot use UMR to update certain elements of the MKC. See + * get_umr_update_access_mask() and umr_check_mkey_mask() + */ +static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + return false; + + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + return false; + + return true; +} + +static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes) +{ + return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / + MLX5_IB_UMR_OCTOWORD; +} + +struct mlx5r_umr_context { + struct ib_cqe cqe; + enum ib_wc_status status; + struct completion done; +}; + +struct mlx5r_umr_wqe { + struct mlx5_wqe_umr_ctrl_seg ctrl_seg; + struct mlx5_mkey_seg mkey_seg; + struct mlx5_wqe_data_seg data_seg; +}; + +int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr); +int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, + int access_flags); +int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); +int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, + int page_shift, int flags); + +#endif /* _MLX5_IB_UMR_H */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 51e48ca9016e..855f3f4fefad 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -7,6 +7,7 @@ #include <linux/mlx5/qp.h> #include <linux/mlx5/driver.h> #include "wr.h" +#include "umr.h" static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND] = MLX5_OPCODE_SEND, @@ -25,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = { [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, }; -/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the - * next nearby edge and get new address translation for current WQE position. - * @sq - SQ buffer. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @cur_edge: Updated current edge. - */ -static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, - u32 wqe_sz, void **cur_edge) -{ - u32 idx; - - if (likely(*seg != *cur_edge)) - return; - - idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); - *cur_edge = get_sq_edge(sq, idx); - - *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); -} - -/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's - * pointers. At the end @seg is aligned to 16B regardless the copied size. - * @sq - SQ buffer. - * @cur_edge: Updated current edge. - * @seg: Current WQE position (16B aligned). - * @wqe_sz: Total current WQE size [16B]. - * @src: Pointer to copy from. - * @n: Number of bytes to copy. - */ -static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, - void **seg, u32 *wqe_sz, const void *src, - size_t n) -{ - while (likely(n)) { - size_t leftlen = *cur_edge - *seg; - size_t copysz = min_t(size_t, leftlen, n); - size_t stride; - - memcpy(*seg, src, copysz); - - n -= copysz; - src += copysz; - stride = !n ? ALIGN(copysz, 16) : copysz; - *seg += stride; - *wqe_sz += stride >> 4; - handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); - } -} - -static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, - struct ib_cq *ib_cq) +int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; unsigned int cur; @@ -122,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* memcpy_send_wqe should get a 16B align address. Hence, we - * first copy up to the current edge and then, if needed, - * continue to memcpy_send_wqe. + /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence, + * we first copy up to the current edge and then, if needed, + * continue to mlx5r_memcpy_send_wqe. */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); @@ -138,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, - left); + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, + pdata, left); } return; @@ -165,12 +115,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static u64 get_xlt_octo(u64 bytes) -{ - return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) / - MLX5_IB_UMR_OCTOWORD; -} - static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -222,7 +166,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); umr->flags = flags; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(atomic); } @@ -233,134 +177,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) umr->flags = MLX5_UMR_INLINE; } -static __be64 get_umr_enable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_KEY | - MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_disable_mr_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_FREE; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_translation_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_LEN | - MLX5_MKEY_MASK_PAGE_SIZE | - MLX5_MKEY_MASK_START_ADDR; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_access_mask(int atomic, - int relaxed_ordering_write, - int relaxed_ordering_read) -{ - u64 result; - - result = MLX5_MKEY_MASK_LR | - MLX5_MKEY_MASK_LW | - MLX5_MKEY_MASK_RR | - MLX5_MKEY_MASK_RW; - - if (atomic) - result |= MLX5_MKEY_MASK_A; - - if (relaxed_ordering_write) - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE; - - if (relaxed_ordering_read) - result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ; - - return cpu_to_be64(result); -} - -static __be64 get_umr_update_pd_mask(void) -{ - u64 result; - - result = MLX5_MKEY_MASK_PD; - - return cpu_to_be64(result); -} - -static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask) -{ - if (mask & MLX5_MKEY_MASK_PAGE_SIZE && - MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_A && - MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - return -EPERM; - - if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - return -EPERM; - - return 0; -} - -static int set_reg_umr_segment(struct mlx5_ib_dev *dev, - struct mlx5_wqe_umr_ctrl_seg *umr, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(umr, 0, sizeof(*umr)); - - if (!umrwr->ignore_free_state) { - if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE) - /* fail if free */ - umr->flags = MLX5_UMR_CHECK_FREE; - else - /* fail if not free */ - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - } - - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size)); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) { - u64 offset = get_xlt_octo(umrwr->offset); - - umr->xlt_offset = cpu_to_be16(offset & 0xffff); - umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16); - umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN; - } - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION) - umr->mkey_mask |= get_umr_update_translation_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) { - umr->mkey_mask |= get_umr_update_access_mask( - !!(MLX5_CAP_GEN(dev->mdev, atomic)), - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)), - !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))); - umr->mkey_mask |= get_umr_update_pd_mask(); - } - if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR) - umr->mkey_mask |= get_umr_enable_mr_mask(); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - umr->mkey_mask |= get_umr_disable_mr_mask(); - - if (!wr->num_sge) - umr->flags |= MLX5_UMR_INLINE; - - return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask)); -} - static u8 get_umr_flags(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | @@ -398,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, - struct mlx5_mkey_seg *seg, - const struct ib_send_wr *wr) -{ - const struct mlx5_umr_wr *umrwr = umr_wr(wr); - - memset(seg, 0, sizeof(*seg)); - if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) - MLX5_SET(mkc, seg, free, 1); - - MLX5_SET(mkc, seg, a, - !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, seg, rw, - !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, seg, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) - MLX5_SET(mkc, seg, relaxed_ordering_write, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - - if (umrwr->pd) - MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); - if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION && - !umrwr->length) - MLX5_SET(mkc, seg, length64, 1); - - MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr); - MLX5_SET64(mkc, seg, len, umrwr->length); - MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift); - MLX5_SET(mkc, seg, qpn, 0xffffff); - MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey)); -} - static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) @@ -760,7 +539,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); seg->len = cpu_to_be64(length); - seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size)); + seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size)); seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); } @@ -770,7 +549,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; - umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); + umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size)); umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); umr->mkey_mask = sig_mkey_mask(); } @@ -870,7 +649,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and * kernel ULPs are not aware of it, so we don't set it here. */ - if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { + if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) { mlx5_ib_warn( to_mdev(qp->ibqp.device), "Fast update for MR access flags is not possible\n"); @@ -899,8 +678,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, - mr_list_size); + mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); @@ -942,23 +721,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) } } -static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned int *idx, - int *size, void **cur_edge, int nreq, - bool send_signaled, bool solicited) +int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, + int *size, void **cur_edge, int nreq, __be32 general_id, + bool send_signaled, bool solicited) { - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; - (*ctrl)->imm = send_ieth(wr); + (*ctrl)->general_id = general_id; (*ctrl)->fm_ce_se = qp->sq_signal_bits | - (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); + (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (solicited ? MLX5_WQE_CTRL_SOLICITED : 0); *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; @@ -972,16 +750,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, const struct ib_send_wr *wr, unsigned int *idx, int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - wr->send_flags & IB_SEND_SIGNALED, - wr->send_flags & IB_SEND_SOLICITED); + return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, + send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED, + wr->send_flags & IB_SEND_SOLICITED); } -static void finish_wqe(struct mlx5_ib_qp *qp, - struct mlx5_wqe_ctrl_seg *ctrl, - void *seg, u8 size, void *cur_edge, - unsigned int idx, u64 wr_id, int nreq, u8 fence, - u32 mlx5_opcode) +void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, unsigned int idx, + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -1045,8 +821,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, /* * SET_PSV WQEs are not signaled and solicited on error. */ - err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, - false, true); + err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq, + send_ieth(wr), false, true); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -1057,8 +833,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, mlx5_ib_warn(dev, "\n"); goto out; } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - next_fence, MLX5_OPCODE_SET_PSV); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, next_fence, MLX5_OPCODE_SET_PSV); out: return err; @@ -1098,8 +874,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, if (unlikely(err)) goto out; - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, - nreq, fence, MLX5_OPCODE_UMR); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, + wr->wr_id, nreq, fence, MLX5_OPCODE_UMR); err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq); if (unlikely(err)) { @@ -1130,8 +906,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev, mlx5_ib_warn(dev, "\n"); goto out; } - finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, + nreq, fence, MLX5_OPCODE_UMR); sig_attrs = mr->ibmr.sig_attrs; err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq, @@ -1246,33 +1022,30 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, } } -static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - const struct ib_send_wr *wr, - struct mlx5_wqe_ctrl_seg **ctrl, void **seg, - int *size, void **cur_edge, unsigned int idx) +void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, + struct mlx5_wqe_ctrl_seg *ctrl) { - int err = 0; + struct mlx5_bf *bf = &qp->bf; - if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) { - err = -EINVAL; - mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode); - goto out; - } + qp->sq.head += nreq; - qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey); - err = set_reg_umr_segment(dev, *seg, wr); - if (unlikely(err)) - goto out; - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(dev, *seg, wr); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); -out: - return err; + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell. + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + bf->offset ^= bf->buf_size; } int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -1283,7 +1056,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_wqe_xrc_seg *xrc; - struct mlx5_bf *bf; void *cur_edge; int size; unsigned long flags; @@ -1305,8 +1077,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->type == IB_QPT_GSI) return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); - bf = &qp->bf; - spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { @@ -1384,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_QPT_UD: handle_qpt_ud(qp, wr, &seg, &size, &cur_edge); break; - case MLX5_IB_QPT_REG_UMR: - err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg, - &size, &cur_edge, idx); - if (unlikely(err)) - goto out; - break; default: break; @@ -1418,35 +1182,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, - fence, mlx5_ib_opcode[wr->opcode]); + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, + nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); } out: - if (likely(nreq)) { - qp->sq.head += nreq; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell. - */ - wmb(); - - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - bf->offset ^= bf->buf_size; - } + if (likely(nreq)) + mlx5r_ring_db(qp, nreq, ctrl); spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -1486,7 +1231,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; nreq++, wr = wr->next) { - if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h index 4f0057516402..2dc89438000d 100644 --- a/drivers/infiniband/hw/mlx5/wr.h +++ b/drivers/infiniband/hw/mlx5/wr.h @@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) return fragment_end + MLX5_SEND_WQE_BB; } +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq: SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + if (likely(*seg != *cur_edge)) + return; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant + * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq: SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, + const void *src, size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + +int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq); +int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx, + int *size, void **cur_edge, int nreq, __be32 general_id, + bool send_signaled, bool solicited); +void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, + void *seg, u8 size, void *cur_edge, unsigned int idx, + u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode); +void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq, + struct mlx5_wqe_ctrl_seg *ctrl); int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr, bool drain); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index acf9970ec245..dd4021b11963 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -90,8 +90,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; attr->max_send_sge = dev->attr.max_send_sge; attr->max_recv_sge = dev->attr.max_recv_sge; attr->max_sge_rd = dev->attr.max_rdma_sge; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 65ce6d0f1885..5152f10d2e6d 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -500,7 +500,6 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev) if (dev->int_info.msix_cnt) { idx = i * dev->num_hwfns + dev->affin_hwfn_idx; vector = dev->int_info.msix[idx].vector; - synchronize_irq(vector); free_irq(vector, &dev->cnq_array[i]); } } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a53476653b0d..f0f43b6db89e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -134,7 +134,8 @@ int qedr_query_device(struct ib_device *ibdev, attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY; if (!rdma_protocol_iwarp(&dev->ibdev, 1)) attr->device_cap_flags |= IB_DEVICE_XRC; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index a8e1c30c370f..b37b1c6d35c6 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -678,7 +678,7 @@ struct qib_pportdata { /* Observers. Not to be taken lightly, possibly not to ship. */ /* * If a diag read or write is to (bottom <= offset <= top), - * the "hoook" is called, allowing, e.g. shadows to be + * the "hook" is called, allowing, e.g. shadows to be * updated in sync with the driver. struct diag_observer * is the "visible" part. */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index d346dd48e731..46653ad56f5a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -534,6 +534,11 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, struct usnic_ib_vf *vf; enum usnic_vnic_res_type res_type; + if (!device_iommu_mapped(&pdev->dev)) { + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); + return -EPERM; + } + vf = kzalloc(sizeof(*vf), GFP_KERNEL); if (!vf) return -ENOMEM; @@ -642,12 +647,6 @@ static int __init usnic_ib_init(void) printk_once(KERN_INFO "%s", usnic_version); - err = usnic_uiom_init(DRV_NAME); - if (err) { - usnic_err("Unable to initialize umem with err %d\n", err); - return err; - } - err = pci_register_driver(&usnic_ib_pci_driver); if (err) { usnic_err("Unable to register with PCI\n"); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index d3a9670bf971..6e8c4fbb8083 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -305,7 +305,8 @@ int usnic_ib_query_device(struct ib_device *ibdev, props->max_qp = qp_per_vf * kref_read(&us_ibdev->vf_cnt); props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | - IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_SYS_IMAGE_GUID; + props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK; props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * kref_read(&us_ibdev->vf_cnt); props->max_pd = USNIC_UIOM_MAX_PD_CNT; @@ -442,7 +443,7 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); - pd->umem_pd = usnic_uiom_alloc_pd(); + pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent); if (IS_ERR(pd->umem_pd)) return PTR_ERR(pd->umem_pd); @@ -706,4 +707,3 @@ int usnic_ib_mmap(struct ib_ucontext *context, usnic_err("No VF %u found\n", vfid); return -EINVAL; } - diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 760b254ba42d..e212929369df 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -40,7 +40,6 @@ #include <linux/iommu.h> #include <linux/workqueue.h> #include <linux/list.h> -#include <linux/pci.h> #include <rdma/ib_verbs.h> #include "usnic_log.h" @@ -439,7 +438,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr) __usnic_uiom_release_tail(uiomr); } -struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) +struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev) { struct usnic_uiom_pd *pd; void *domain; @@ -448,7 +447,7 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) if (!pd) return ERR_PTR(-ENOMEM); - pd->domain = domain = iommu_domain_alloc(&pci_bus_type); + pd->domain = domain = iommu_domain_alloc(dev->bus); if (!domain) { usnic_err("Failed to allocate IOMMU domain"); kfree(pd); @@ -556,13 +555,3 @@ void usnic_uiom_free_dev_list(struct device **devs) { kfree(devs); } - -int usnic_uiom_init(char *drv_name) -{ - if (!iommu_present(&pci_bus_type)) { - usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); - return -EPERM; - } - - return 0; -} diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 7ec8991ace67..5a9acf941510 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -80,7 +80,7 @@ struct usnic_uiom_chunk { struct scatterlist page_list[]; }; -struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); +struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev); void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, @@ -91,5 +91,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, unsigned long addr, size_t size, int access, int dmasync); void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr); -int usnic_uiom_init(char *drv_name); #endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 8ef112f883a7..3acab569fbb9 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi, EXPORT_SYMBOL(rvt_qp_iter); /* - * This should be called with s_lock held. + * This should be called with s_lock and r_lock held. */ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) @@ -3134,7 +3134,9 @@ send_comp: rvp->n_loop_pkts++; flush_send: sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (local_ops) { atomic_dec(&sqp->local_ops_pending); local_ops = 0; @@ -3188,7 +3190,9 @@ serr: spin_unlock_irqrestore(&qp->r_lock, flags); serr_no_r_lock: spin_lock_irqsave(&sqp->s_lock, flags); + spin_lock(&sqp->r_lock); rvt_send_complete(sqp, wqe, send_status); + spin_unlock(&sqp->r_lock); if (sqp->ibqp.qp_type == IB_QPT_RC) { int lastwqe; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 2dae7538a2ea..51daac5c4feb 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -46,6 +46,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe) rxe->attr.max_qp = RXE_MAX_QP; rxe->attr.max_qp_wr = RXE_MAX_QP_WR; rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; + rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG; rxe->attr.max_send_sge = RXE_MAX_SGE; rxe->attr.max_recv_sge = RXE_MAX_SGE; rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 138b3e7d3a5f..da3a398053b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -562,7 +562,8 @@ int rxe_completer(void *arg) enum comp_state state; int ret = 0; - rxe_get(qp); + if (!rxe_get(qp)) + return -EAGAIN; if (!qp->valid || qp->req.state == QP_STATE_ERROR || qp->req.state == QP_STATE_RESET) { diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 2ffbe3390668..0e022ae1b8a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -37,7 +37,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); void rxe_cq_disable(struct rxe_cq *cq); -void rxe_cq_cleanup(struct rxe_pool_elem *arg); +void rxe_cq_cleanup(struct rxe_pool_elem *elem); /* rxe_mcast.c */ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); @@ -81,7 +81,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -void rxe_mr_cleanup(struct rxe_pool_elem *arg); +void rxe_mr_cleanup(struct rxe_pool_elem *elem); /* rxe_mw.c */ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata); @@ -89,7 +89,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw); int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe); int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey); struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey); -void rxe_mw_cleanup(struct rxe_pool_elem *arg); +void rxe_mw_cleanup(struct rxe_pool_elem *elem); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, @@ -114,7 +114,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); void rxe_qp_error(struct rxe_qp *qp); int rxe_qp_chk_destroy(struct rxe_qp *qp); -void rxe_qp_destroy(struct rxe_qp *qp); void rxe_qp_cleanup(struct rxe_pool_elem *elem); static inline int qp_num(struct rxe_qp *qp) @@ -159,18 +158,16 @@ void retransmit_timer(struct timer_list *t); void rnr_nak_timer(struct timer_list *t); /* rxe_srq.c */ -#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) - -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); - +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init); int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp); - +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask); int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata); +void rxe_srq_cleanup(struct rxe_pool_elem *elem); void rxe_dealloc(struct ib_device *ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 873a9b10307c..86cc2e18a7fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -206,8 +206,10 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) /* speculative alloc of new mcg */ mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); - if (!mcg) - return ERR_PTR(-ENOMEM); + if (!mcg) { + err = -ENOMEM; + goto err_dec; + } spin_lock_bh(&rxe->mcg_lock); /* re-check to see if someone else just added it */ diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 60a31b718774..fc3942e04a1f 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -683,14 +683,10 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mr *mr = to_rmr(ibmr); - if (atomic_read(&mr->num_mw) > 0) { - pr_warn("%s: Attempt to deregister an MR while bound to MWs\n", - __func__); + /* See IBA 10.6.7.2.6 */ + if (atomic_read(&mr->num_mw) > 0) return -EINVAL; - } - mr->state = RXE_MR_STATE_INVALID; - rxe_put(mr_pd(mr)); rxe_put(mr); return 0; @@ -700,6 +696,8 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem) { struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); + rxe_put(mr_pd(mr)); + ib_umem_release(mr->umem); if (mr->cur_map_set) diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index c86b2efd58f2..2e1fa844fabf 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -3,6 +3,14 @@ * Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved. */ +/* + * The rdma_rxe driver supports type 1 or type 2B memory windows. + * Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by + * ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw() + * but bound by bind_mw work requests. The ibv_bind_mw() call is converted + * by libibverbs to a bind_mw work request. + */ + #include "rxe.h" int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) @@ -28,40 +36,11 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) return 0; } -static void rxe_do_dealloc_mw(struct rxe_mw *mw) -{ - if (mw->mr) { - struct rxe_mr *mr = mw->mr; - - mw->mr = NULL; - atomic_dec(&mr->num_mw); - rxe_put(mr); - } - - if (mw->qp) { - struct rxe_qp *qp = mw->qp; - - mw->qp = NULL; - rxe_put(qp); - } - - mw->access = 0; - mw->addr = 0; - mw->length = 0; - mw->state = RXE_MW_STATE_INVALID; -} - int rxe_dealloc_mw(struct ib_mw *ibmw) { struct rxe_mw *mw = to_rmw(ibmw); - struct rxe_pd *pd = to_rpd(ibmw->pd); - - spin_lock_bh(&mw->lock); - rxe_do_dealloc_mw(mw); - spin_unlock_bh(&mw->lock); rxe_put(mw); - rxe_put(pd); return 0; } @@ -328,3 +307,31 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) return mw; } + +void rxe_mw_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); + struct rxe_pd *pd = to_rpd(mw->ibmw.pd); + + rxe_put(pd); + + if (mw->mr) { + struct rxe_mr *mr = mw->mr; + + mw->mr = NULL; + atomic_dec(&mr->num_mw); + rxe_put(mr); + } + + if (mw->qp) { + struct rxe_qp *qp = mw->qp; + + mw->qp = NULL; + rxe_put(qp); + } + + mw->access = 0; + mw->addr = 0; + mw->length = 0; + mw->state = RXE_MW_STATE_INVALID; +} diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index df596ba7527d..d4ba4d506f17 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -29,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND] = { .name = "IB_WR_SEND", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, @@ -39,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_WR_SEND_WITH_IMM] = { .name = "IB_WR_SEND_WITH_IMM", .mask = { - [IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK, [IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK, diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 918270e34a35..568a7cbd13d4 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -50,9 +50,7 @@ enum rxe_device_param { | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS - | IB_DEVICE_ALLOW_USER_UNREG | IB_DEVICE_MEM_WINDOW - | IB_DEVICE_MEM_WINDOW_TYPE_2A | IB_DEVICE_MEM_WINDOW_TYPE_2B, RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 87066d04ed18..19b14826385b 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -13,7 +13,6 @@ static const struct rxe_type_info { size_t size; size_t elem_offset; void (*cleanup)(struct rxe_pool_elem *elem); - enum rxe_pool_flags flags; u32 min_index; u32 max_index; u32 max_elem; @@ -46,6 +45,7 @@ static const struct rxe_type_info { .name = "srq", .size = sizeof(struct rxe_srq), .elem_offset = offsetof(struct rxe_srq, elem), + .cleanup = rxe_srq_cleanup, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, @@ -73,7 +73,6 @@ static const struct rxe_type_info { .size = sizeof(struct rxe_mr), .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, - .flags = RXE_POOL_ALLOC, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, @@ -82,6 +81,7 @@ static const struct rxe_type_info { .name = "mw", .size = sizeof(struct rxe_mw), .elem_offset = offsetof(struct rxe_mw, elem), + .cleanup = rxe_mw_cleanup, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, @@ -101,7 +101,6 @@ void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, pool->max_elem = info->max_elem; pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); pool->elem_offset = info->elem_offset; - pool->flags = info->flags; pool->cleanup = info->cleanup; atomic_set(&pool->num_elem, 0); @@ -122,7 +121,7 @@ void *rxe_alloc(struct rxe_pool *pool) void *obj; int err; - if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC))) + if (WARN_ON(!(pool->type == RXE_TYPE_MR))) return NULL; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) @@ -156,7 +155,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem) { int err; - if (WARN_ON(pool->flags & RXE_POOL_ALLOC)) + if (WARN_ON(pool->type == RXE_TYPE_MR)) return -EINVAL; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) @@ -206,7 +205,7 @@ static void rxe_elem_release(struct kref *kref) if (pool->cleanup) pool->cleanup(elem); - if (pool->flags & RXE_POOL_ALLOC) + if (pool->type == RXE_TYPE_MR) kfree(elem->obj); atomic_dec(&pool->num_elem); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 24bcc786c1b3..0860660d65ec 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -7,10 +7,6 @@ #ifndef RXE_POOL_H #define RXE_POOL_H -enum rxe_pool_flags { - RXE_POOL_ALLOC = BIT(1), -}; - enum rxe_elem_type { RXE_TYPE_UC, RXE_TYPE_PD, @@ -35,7 +31,6 @@ struct rxe_pool { struct rxe_dev *rxe; const char *name; void (*cleanup)(struct rxe_pool_elem *elem); - enum rxe_pool_flags flags; enum rxe_elem_type type; unsigned int max_elem; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 62acf890af6c..22e9b85344c3 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -63,7 +63,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) int port_num = init->port_num; switch (init->qp_type) { - case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_RC: case IB_QPT_UC: @@ -81,7 +80,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (rxe_qp_chk_cap(rxe, cap, !!init->srq)) goto err1; - if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) { + if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { pr_warn("invalid port = %d\n", port_num); goto err1; @@ -89,11 +88,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) port = &rxe->port; - if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) { - pr_warn("SMI QP exists for port %d\n", port_num); - goto err1; - } - if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { pr_warn("GSI QP exists for port %d\n", port_num); goto err1; @@ -167,12 +161,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, port = &rxe->port; switch (init->qp_type) { - case IB_QPT_SMI: - qp->ibqp.qp_num = 0; - port->qp_smi_index = qpn; - qp->attr.port_num = init->port_num; - break; - case IB_QPT_GSI: qp->ibqp.qp_num = 1; port->qp_gsi_index = qpn; @@ -334,6 +322,9 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, qp->scq = scq; qp->srq = srq; + atomic_inc(&rcq->num_wq); + atomic_inc(&scq->num_wq); + rxe_qp_init_misc(rxe, qp, init); err = rxe_qp_init_req(rxe, qp, init, udata, uresp); @@ -353,6 +344,9 @@ err2: rxe_queue_cleanup(qp->sq.queue); qp->sq.queue = NULL; err1: + atomic_dec(&rcq->num_wq); + atomic_dec(&scq->num_wq); + qp->pd = NULL; qp->rcq = NULL; qp->scq = NULL; @@ -777,9 +771,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp) return 0; } -/* called by the destroy qp verb */ -void rxe_qp_destroy(struct rxe_qp *qp) +/* called when the last reference to the qp is dropped */ +static void rxe_qp_do_cleanup(struct work_struct *work) { + struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + qp->valid = 0; qp->qp_timeout_jiffies = 0; rxe_cleanup_task(&qp->resp.task); @@ -798,12 +794,6 @@ void rxe_qp_destroy(struct rxe_qp *qp) __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); } -} - -/* called when the last reference to the qp is dropped */ -static void rxe_qp_do_cleanup(struct work_struct *work) -{ - struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); @@ -814,10 +804,14 @@ static void rxe_qp_do_cleanup(struct work_struct *work) if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); + atomic_dec(&qp->scq->num_wq); if (qp->scq) rxe_put(qp->scq); + + atomic_dec(&qp->rcq->num_wq); if (qp->rcq) rxe_put(qp->rcq); + if (qp->pd) rxe_put(qp->pd); diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index d09a8b68c962..f3ad7b6dbd97 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -34,7 +34,6 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, } break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: if (unlikely(pkt_type != IB_OPCODE_UD)) { pr_warn_ratelimited("bad qp type\n"); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index ae5fbc79dd5c..9d98237389cf 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -33,8 +33,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp, } else { advance_dma_data(&wqe->dma, to_send); } - if (mask & WR_WRITE_MASK) - wqe->iova += qp->mtu; } } @@ -308,7 +306,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, case IB_QPT_UC: return next_opcode_uc(qp, opcode, fits); - case IB_QPT_SMI: case IB_QPT_UD: case IB_QPT_GSI: switch (opcode) { @@ -414,8 +411,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_ATMETH_MASK) { atmeth_set_va(pkt, wqe->iova); - if (opcode == IB_OPCODE_RC_COMPARE_SWAP || - opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (opcode == IB_OPCODE_RC_COMPARE_SWAP) { atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap); atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add); } else { @@ -437,7 +433,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, u32 paylen) + struct sk_buff *skb, u32 payload) { int err; @@ -449,19 +445,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, if (wqe->wr.send_flags & IB_SEND_INLINE) { u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset]; - memcpy(payload_addr(pkt), tmp, paylen); + memcpy(payload_addr(pkt), tmp, payload); - wqe->dma.resid -= paylen; - wqe->dma.sge_offset += paylen; + wqe->dma.resid -= payload; + wqe->dma.sge_offset += payload; } else { err = copy_data(qp->pd, 0, &wqe->dma, - payload_addr(pkt), paylen, + payload_addr(pkt), payload, RXE_FROM_MR_OBJ); if (err) return err; } if (bth_pad(pkt)) { - u8 *pad = payload_addr(pkt) + paylen; + u8 *pad = payload_addr(pkt) + payload; memset(pad, 0, bth_pad(pkt)); } @@ -527,8 +523,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, qp->req.psn = rollback_psn; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt) +static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -611,7 +606,8 @@ int rxe_requester(void *arg) struct rxe_ah *ah; struct rxe_av *av; - rxe_get(qp); + if (!rxe_get(qp)) + return -EAGAIN; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -661,7 +657,7 @@ next_wqe: opcode = next_opcode(qp, wqe, wqe->wr.opcode); if (unlikely(opcode < 0)) { wqe->status = IB_WC_LOC_QP_OP_ERR; - goto exit; + goto err; } mask = rxe_opcode[opcode].mask; @@ -755,7 +751,7 @@ next_wqe: goto err; } - update_state(qp, wqe, &pkt); + update_state(qp, &pkt); goto next_wqe; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 9cd0eaff98de..f4f6ee5d81fe 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -277,7 +277,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, break; case IB_QPT_UD: - case IB_QPT_SMI: case IB_QPT_GSI: break; @@ -577,8 +576,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp, qp->resp.atomic_orig = *vaddr; - if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP || - pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) { + if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) { if (*vaddr == atmeth_comp(pkt)) *vaddr = atmeth_swap_add(pkt); } else { @@ -834,7 +832,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) if (pkt->mask & RXE_SEND_MASK) { if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { if (skb->protocol == htons(ETH_P_IP)) { memset(&hdr.reserved, 0, @@ -1265,7 +1262,8 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; - rxe_get(qp); + if (!rxe_get(qp)) + return -EAGAIN; qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 0c0721f04357..02b39498c370 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -6,64 +6,34 @@ #include <linux/vmalloc.h> #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" -int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, - struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) { - if (srq && srq->error) { - pr_warn("srq in error state\n"); + struct ib_srq_attr *attr = &init->attr; + + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); goto err1; } - if (mask & IB_SRQ_MAX_WR) { - if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", - attr->max_wr, rxe->attr.max_srq_wr); - goto err1; - } - - if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); - goto err1; - } - - if (srq && srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", - attr->max_wr, srq->limit); - goto err1; - } - - if (attr->max_wr < RXE_MIN_SRQ_WR) - attr->max_wr = RXE_MIN_SRQ_WR; + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; } - if (mask & IB_SRQ_LIMIT) { - if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", - attr->srq_limit, rxe->attr.max_srq_wr); - goto err1; - } + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; - if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", - attr->srq_limit, - srq->rq.queue->buf->index_mask); - goto err1; - } + if (attr->max_sge > rxe->attr.max_srq_sge) { + pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + attr->max_sge, rxe->attr.max_srq_sge); + goto err1; } - if (mask == IB_SRQ_INIT_MASK) { - if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", - attr->max_sge, rxe->attr.max_srq_sge); - goto err1; - } - - if (attr->max_sge < RXE_MIN_SRQ_SGE) - attr->max_sge = RXE_MIN_SRQ_SGE; - } + if (attr->max_sge < RXE_MIN_SRQ_SGE) + attr->max_sge = RXE_MIN_SRQ_SGE; return 0; @@ -93,8 +63,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, spin_lock_init(&srq->rq.consumer_lock); type = QUEUE_TYPE_FROM_CLIENT; - q = rxe_queue_init(rxe, &srq->rq.max_wr, - srq_wqe_size, type); + q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { pr_warn("unable to allocate queue for srq\n"); return -ENOMEM; @@ -121,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; } +int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, + struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) +{ + if (srq->error) { + pr_warn("srq in error state\n"); + goto err1; + } + + if (mask & IB_SRQ_MAX_WR) { + if (attr->max_wr > rxe->attr.max_srq_wr) { + pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + attr->max_wr, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->max_wr <= 0) { + pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + goto err1; + } + + if (srq->limit && (attr->max_wr < srq->limit)) { + pr_warn("max_wr (%d) < srq->limit (%d)\n", + attr->max_wr, srq->limit); + goto err1; + } + + if (attr->max_wr < RXE_MIN_SRQ_WR) + attr->max_wr = RXE_MIN_SRQ_WR; + } + + if (mask & IB_SRQ_LIMIT) { + if (attr->srq_limit > rxe->attr.max_srq_wr) { + pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + attr->srq_limit, rxe->attr.max_srq_wr); + goto err1; + } + + if (attr->srq_limit > srq->rq.queue->buf->index_mask) { + pr_warn("srq_limit (%d) > cur limit(%d)\n", + attr->srq_limit, + srq->rq.queue->buf->index_mask); + goto err1; + } + } + + return 0; + +err1: + return -EINVAL; +} + int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) @@ -154,3 +174,14 @@ err2: srq->rq.queue = NULL; return err; } + +void rxe_srq_cleanup(struct rxe_pool_elem *elem) +{ + struct rxe_srq *srq = container_of(elem, typeof(*srq), elem); + + if (srq->pd) + rxe_put(srq->pd); + + if (srq->rq.queue) + rxe_queue_cleanup(srq->rq.queue); +} diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 67184b0281a0..9d995854a174 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -7,8 +7,8 @@ #include <linux/dma-mapping.h> #include <net/addrconf.h> #include <rdma/uverbs_ioctl.h> + #include "rxe.h" -#include "rxe_loc.h" #include "rxe_queue.h" #include "rxe_hw_counters.h" @@ -286,36 +286,34 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; - if (init->srq_type != IB_SRQT_BASIC) - return -EOPNOTSUPP; - if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; } - err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); + if (init->srq_type != IB_SRQT_BASIC) + return -EOPNOTSUPP; + + err = rxe_srq_chk_init(rxe, init); if (err) - goto err1; + return err; err = rxe_add_to_pool(&rxe->srq_pool, srq); if (err) - goto err1; + return err; rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) - goto err2; + goto err_put; return 0; -err2: - rxe_put(pd); +err_put: rxe_put(srq); -err1: return err; } @@ -339,16 +337,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, err = rxe_srq_chk_attr(rxe, srq, attr, mask); if (err) - goto err1; + return err; err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) - goto err1; - + return err; return 0; - -err1: - return err; } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -368,10 +362,6 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); - if (srq->rq.queue) - rxe_queue_cleanup(srq->rq.queue); - - rxe_put(srq->pd); rxe_put(srq); return 0; } @@ -495,7 +485,6 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (ret) return ret; - rxe_qp_destroy(qp); rxe_put(qp); return 0; } @@ -536,7 +525,6 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, wr->send_flags = ibwr->send_flags; if (qp_type(qp) == IB_QPT_UD || - qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { struct ib_ah *ibah = ud_wr(ibwr)->ah; @@ -807,6 +795,12 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); + /* See IBA C11-17: The CI shall return an error if this Verb is + * invoked while a Work Queue is still associated with the CQ. + */ + if (atomic_read(&cq->num_wq)) + return -EINVAL; + rxe_cq_disable(cq); rxe_put(cq); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index e7eff1ca75e9..ac464e68c923 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -67,6 +67,7 @@ struct rxe_cq { bool is_dying; bool is_user; struct tasklet_struct comp_task; + atomic_t num_wq; }; enum wqe_state { @@ -373,7 +374,6 @@ struct rxe_port { spinlock_t port_lock; /* guard port */ unsigned int mtu_cap; /* special QPs */ - u32 qp_smi_index; u32 qp_gsi_index; }; @@ -394,7 +394,6 @@ struct rxe_dev { struct rxe_pool cq_pool; struct rxe_pool mr_pool; struct rxe_pool mw_pool; - struct rxe_pool mc_grp_pool; /* multicast support */ spinlock_t mcg_lock; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index e5c586913d0b..dacc174604bf 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -119,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev) * <linux/if_arp.h> for type identifiers. */ if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 || + netdev->type == ARPHRD_NONE || (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) return 1; @@ -315,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK) { + if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, netdev->dev_addr); } else { /* - * The loopback device does not have a HW address, + * This device does not have a HW address, * but connection mangagement lib expects gid != 0 */ size_t len = min_t(size_t, strlen(base_dev->name), 6); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 54ef367b074a..09316072b789 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -132,8 +132,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, /* Revisit atomic caps if RFC 7306 gets supported */ attr->atomic_cap = 0; - attr->device_cap_flags = - IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG; + attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 44d8d151ff90..35e9c8a330e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -411,6 +411,7 @@ struct ipoib_dev_priv { struct dentry *path_dentry; #endif u64 hca_caps; + u64 kernel_caps; struct ipoib_ethtool_st ethtool; unsigned int max_send_sge; const struct net_device_ops *rn_ops; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 9934b8bd7f56..2a8961b685c2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1850,11 +1850,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev) static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) { priv->hca_caps = priv->ca->attrs.device_cap_flags; + priv->kernel_caps = priv->ca->attrs.kernel_cap_flags; if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - if (priv->hca_caps & IB_DEVICE_UD_TSO) + if (priv->kernel_caps & IBK_UD_TSO) priv->dev->hw_features |= NETIF_F_TSO; priv->dev->features |= priv->dev->hw_features; @@ -2201,7 +2202,7 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name, priv->rn_ops = dev->netdev_ops; - if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION) + if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION) dev->netdev_ops = &ipoib_netdev_ops_vf; else dev->netdev_ops = &ipoib_netdev_ops_pf; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 5a150a080ac2..368e5d77416d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -197,16 +197,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) init_attr.send_cq = priv->send_cq; init_attr.recv_cq = priv->recv_cq; - if (priv->hca_caps & IB_DEVICE_UD_TSO) + if (priv->kernel_caps & IBK_UD_TSO) init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; - if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) + if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; - if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA) + if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA) init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE; priv->qp = ib_create_qp(priv->pd, &init_attr); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index f8d0bab4424c..321949a570ed 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -650,7 +650,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, SHOST_DIX_GUARD_CRC); } - if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) shost->virt_boundary_mask = SZ_4K - 1; if (iscsi_host_add(shost, ib_dev->dev.parent)) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 7e4faf9c5e9e..dee8c97ff056 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -363,7 +363,7 @@ struct iser_fr_pool { * @cq: Connection completion queue * @cq_size: The number of max outstanding completions * @device: reference to iser device - * @fr_pool: connection fast registration poool + * @fr_pool: connection fast registration pool * @pi_support: Indicate device T10-PI support * @reg_cqe: completion handler */ diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 5dbad68c7390..c08f2d9133b6 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -115,7 +115,7 @@ iser_create_fastreg_desc(struct iser_device *device, if (!desc) return ERR_PTR(-ENOMEM); - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; @@ -517,7 +517,7 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn, * (head and tail) for a single page worth data, so one additional * entry is required. */ - if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (attr->kernel_cap_flags & IBK_SG_GAPS_REG) reserved_mr_pages = 0; else reserved_mr_pages = 1; @@ -562,8 +562,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* connection T10-PI support */ if (iser_pi_enable) { - if (!(device->ib_device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { + if (!(device->ib_device->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", dev_name(&ib_conn->device->ib_device->dev)); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 48064bd8aa2c..b360a1527cd1 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -42,6 +42,7 @@ MODULE_PARM_DESC(sg_tablesize, static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); +static struct workqueue_struct *isert_login_wq; static struct workqueue_struct *isert_comp_wq; static struct workqueue_struct *isert_release_wq; @@ -230,7 +231,7 @@ isert_create_device_ib_res(struct isert_device *device) } /* Check signature cap */ - if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER) + if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER) device->pi_capable = true; else device->pi_capable = false; @@ -1017,7 +1018,7 @@ isert_rx_login_req(struct isert_conn *isert_conn) complete(&isert_conn->login_comp); return; } - schedule_delayed_work(&conn->login_work, 0); + queue_delayed_work(isert_login_wq, &conn->login_work, 0); } static struct iscsit_cmd @@ -2348,9 +2349,9 @@ isert_get_login_rx(struct iscsit_conn *conn, struct iscsi_login *login) /* * For login requests after the first PDU, isert_rx_login_req() will - * kick schedule_delayed_work(&conn->login_work) as the packet is - * received, which turns this callback from iscsi_target_do_login_rx() - * into a NOP. + * kick queue_delayed_work(isert_login_wq, &conn->login_work) as + * the packet is received, which turns this callback from + * iscsi_target_do_login_rx() into a NOP. */ if (!login->first_request) return 0; @@ -2606,20 +2607,23 @@ static struct iscsit_transport iser_target_transport = { static int __init isert_init(void) { - int ret; + isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0); + if (!isert_login_wq) { + isert_err("Unable to allocate isert_login_wq\n"); + return -ENOMEM; + } isert_comp_wq = alloc_workqueue("isert_comp_wq", WQ_UNBOUND | WQ_HIGHPRI, 0); if (!isert_comp_wq) { isert_err("Unable to allocate isert_comp_wq\n"); - return -ENOMEM; + goto destroy_login_wq; } isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!isert_release_wq) { isert_err("Unable to allocate isert_release_wq\n"); - ret = -ENOMEM; goto destroy_comp_wq; } @@ -2630,17 +2634,20 @@ static int __init isert_init(void) destroy_comp_wq: destroy_workqueue(isert_comp_wq); +destroy_login_wq: + destroy_workqueue(isert_login_wq); - return ret; + return -ENOMEM; } static void __exit isert_exit(void) { - flush_scheduled_work(); + flush_workqueue(isert_login_wq); destroy_workqueue(isert_release_wq); destroy_workqueue(isert_comp_wq); iscsit_unregister_transport(&iser_target_transport); isert_info("iSER_TARGET[0] - Released iser_target_transport\n"); + destroy_workqueue(isert_login_wq); } MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure"); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index c2c860d0c56e..9809c3883979 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2785,7 +2785,7 @@ static void free_clt(struct rtrs_clt_sess *clt) /** * rtrs_clt_open() - Open a path to an RTRS server * @ops: holds the link event callback and the private pointer. - * @sessname: name of the session + * @pathname: name of the path to an RTRS server * @paths: Paths to be established defined by their src and dst addresses * @paths_num: Number of elements in the @paths array * @port: port to be used by the RTRS session diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 285b766e4e70..6058abf42ba7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -430,7 +430,7 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, spin_lock_init(&pool->lock); INIT_LIST_HEAD(&pool->free_list); - if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; @@ -3650,7 +3650,7 @@ static ssize_t add_target_store(struct device *dev, target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) + if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); @@ -3706,8 +3706,8 @@ static ssize_t add_target_store(struct device *dev, } if (srp_dev->use_fast_reg) { - bool gaps_reg = (ibdev->attrs.device_cap_flags & - IB_DEVICE_SG_GAPS_REG); + bool gaps_reg = ibdev->attrs.kernel_cap_flags & + IBK_SG_GAPS_REG; max_sectors_per_mr = srp_dev->max_pages_per_mr << (ilog2(srp_dev->mr_page_size) - 9); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b87c8ae41d9b..f2a5e1ea508a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -867,8 +867,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); /* T10-PI support */ - if (ctrl->device->dev->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER) + if (ctrl->device->dev->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER) pi_capable = true; ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2fab0b219b25..09fdcac87d17 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1221,8 +1221,8 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; - if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { + if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags & + IBK_INTEGRITY_HANDOVER)) { pr_warn("T10-PI is not supported by device %s. Disabling it\n", cm_id->device->name); nport->pi_enable = false; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 31ef64eb7fbb..b3a1265711cc 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -649,7 +649,7 @@ static int smbd_ia_open( smbd_max_frmr_depth, info->id->device->attrs.max_fast_reg_page_list_len); info->mr_type = IB_MR_TYPE_MEM_REG; - if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) info->mr_type = IB_MR_TYPE_SG_GAPS; info->pd = ib_alloc_pd(info->id->device, 0); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 11ee4eaf84bd..9c6317cf80d5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -220,32 +220,24 @@ enum rdma_link_layer { }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = (1 << 0), - IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), - IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), - IB_DEVICE_RAW_MULTI = (1 << 3), - IB_DEVICE_AUTO_PATH_MIG = (1 << 4), - IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), - IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), - IB_DEVICE_SHUTDOWN_PORT = (1 << 8), - /* Not in use, former INIT_TYPE = (1 << 9),*/ - IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), - IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), - IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), - IB_DEVICE_SRQ_RESIZE = (1 << 13), - IB_DEVICE_N_NOTIFY_CQ = (1 << 14), - - /* - * This device supports a per-device lkey or stag that can be - * used without performing a memory registration for the local - * memory. Note that ULPs should never check this flag, but - * instead of use the local_dma_lkey flag in the ib_pd structure, - * which will always contain a usable lkey. - */ - IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), - /* Reserved, old SEND_W_INV = (1 << 16),*/ - IB_DEVICE_MEM_WINDOW = (1 << 17), + IB_DEVICE_RESIZE_MAX_WR = IB_UVERBS_DEVICE_RESIZE_MAX_WR, + IB_DEVICE_BAD_PKEY_CNTR = IB_UVERBS_DEVICE_BAD_PKEY_CNTR, + IB_DEVICE_BAD_QKEY_CNTR = IB_UVERBS_DEVICE_BAD_QKEY_CNTR, + IB_DEVICE_RAW_MULTI = IB_UVERBS_DEVICE_RAW_MULTI, + IB_DEVICE_AUTO_PATH_MIG = IB_UVERBS_DEVICE_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT = IB_UVERBS_DEVICE_CHANGE_PHY_PORT, + IB_DEVICE_UD_AV_PORT_ENFORCE = IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE, + IB_DEVICE_CURR_QP_STATE_MOD = IB_UVERBS_DEVICE_CURR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT = IB_UVERBS_DEVICE_SHUTDOWN_PORT, + /* IB_DEVICE_INIT_TYPE = IB_UVERBS_DEVICE_INIT_TYPE, (not in use) */ + IB_DEVICE_PORT_ACTIVE_EVENT = IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT, + IB_DEVICE_SYS_IMAGE_GUID = IB_UVERBS_DEVICE_SYS_IMAGE_GUID, + IB_DEVICE_RC_RNR_NAK_GEN = IB_UVERBS_DEVICE_RC_RNR_NAK_GEN, + IB_DEVICE_SRQ_RESIZE = IB_UVERBS_DEVICE_SRQ_RESIZE, + IB_DEVICE_N_NOTIFY_CQ = IB_UVERBS_DEVICE_N_NOTIFY_CQ, + + /* Reserved, old SEND_W_INV = 1 << 16,*/ + IB_DEVICE_MEM_WINDOW = IB_UVERBS_DEVICE_MEM_WINDOW, /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -253,9 +245,8 @@ enum ib_device_cap_flags { * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1 << 18), - IB_DEVICE_UD_TSO = (1 << 19), - IB_DEVICE_XRC = (1 << 20), + IB_DEVICE_UD_IP_CSUM = IB_UVERBS_DEVICE_UD_IP_CSUM, + IB_DEVICE_XRC = IB_UVERBS_DEVICE_XRC, /* * This device supports the IB "base memory management extension", @@ -266,31 +257,53 @@ enum ib_device_cap_flags { * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ - IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), - IB_DEVICE_RC_IP_CSUM = (1 << 25), + IB_DEVICE_MEM_MGT_EXTENSIONS = IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS, + IB_DEVICE_MEM_WINDOW_TYPE_2A = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A, + IB_DEVICE_MEM_WINDOW_TYPE_2B = IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B, + IB_DEVICE_RC_IP_CSUM = IB_UVERBS_DEVICE_RC_IP_CSUM, /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */ - IB_DEVICE_RAW_IP_CSUM = (1 << 26), - /* - * Devices should set IB_DEVICE_CROSS_CHANNEL if they - * support execution of WQEs that involve synchronization - * of I/O operations with single completion queue managed - * by hardware. - */ - IB_DEVICE_CROSS_CHANNEL = (1 << 27), - IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), - IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), - IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), - IB_DEVICE_SG_GAPS_REG = (1ULL << 32), - IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), + IB_DEVICE_RAW_IP_CSUM = IB_UVERBS_DEVICE_RAW_IP_CSUM, + IB_DEVICE_MANAGED_FLOW_STEERING = + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING, /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */ - IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), - IB_DEVICE_RDMA_NETDEV_OPA = (1ULL << 35), + IB_DEVICE_RAW_SCATTER_FCS = IB_UVERBS_DEVICE_RAW_SCATTER_FCS, /* The device supports padding incoming writes to cacheline. */ - IB_DEVICE_PCI_WRITE_END_PADDING = (1ULL << 36), - IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), + IB_DEVICE_PCI_WRITE_END_PADDING = + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING, +}; + +enum ib_kernel_cap_flags { + /* + * This device supports a per-device lkey or stag that can be + * used without performing a memory registration for the local + * memory. Note that ULPs should never check this flag, but + * instead of use the local_dma_lkey flag in the ib_pd structure, + * which will always contain a usable lkey. + */ + IBK_LOCAL_DMA_LKEY = 1 << 0, + /* IB_QP_CREATE_INTEGRITY_EN is supported to implement T10-PI */ + IBK_INTEGRITY_HANDOVER = 1 << 1, + /* IB_ACCESS_ON_DEMAND is supported during reg_user_mr() */ + IBK_ON_DEMAND_PAGING = 1 << 2, + /* IB_MR_TYPE_SG_GAPS is supported */ + IBK_SG_GAPS_REG = 1 << 3, + /* Driver supports RDMA_NLDEV_CMD_DELLINK */ + IBK_ALLOW_USER_UNREG = 1 << 4, + + /* ipoib will use IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK */ + IBK_BLOCK_MULTICAST_LOOPBACK = 1 << 5, + /* iopib will use IB_QP_CREATE_IPOIB_UD_LSO for its QPs */ + IBK_UD_TSO = 1 << 6, + /* iopib will use the device ops: + * get_vf_config + * get_vf_guid + * get_vf_stats + * set_vf_guid + * set_vf_link_state + */ + IBK_VIRTUAL_FUNCTION = 1 << 7, + /* ipoib will use IB_QP_CREATE_NETDEV_USE for its QPs */ + IBK_RDMA_NETDEV_OPA = 1 << 8, }; enum ib_atomic_cap { @@ -389,6 +402,7 @@ struct ib_device_attr { int max_qp; int max_qp_wr; u64 device_cap_flags; + u64 kernel_cap_flags; int max_send_sge; int max_recv_sge; int max_sge_rd; @@ -564,7 +578,7 @@ struct rdma_stat_desc { /** * struct rdma_hw_stats * @lock - Mutex to protect parallel write access to lifespan and values - * of counters, which are 64bits and not guaranteeed to be written + * of counters, which are 64bits and not guaranteed to be written * atomicaly on 32bits systems. * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters @@ -1621,19 +1635,23 @@ struct ib_srq { }; enum ib_raw_packet_caps { - /* Strip cvlan from incoming packet and report it in the matching work + /* + * Strip cvlan from incoming packet and report it in the matching work * completion is supported. */ - IB_RAW_PACKET_CAP_CVLAN_STRIPPING = (1 << 0), - /* Scatter FCS field of an incoming packet to host memory is supported. + IB_RAW_PACKET_CAP_CVLAN_STRIPPING = + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING, + /* + * Scatter FCS field of an incoming packet to host memory is supported. */ - IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1), + IB_RAW_PACKET_CAP_SCATTER_FCS = IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS, /* Checksum offloads are supported (for both send and receive). */ - IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2), - /* When a packet is received for an RQ with no receive WQEs, the + IB_RAW_PACKET_CAP_IP_CSUM = IB_UVERBS_RAW_PACKET_CAP_IP_CSUM, + /* + * When a packet is received for an RQ with no receive WQEs, the * packet processing is delayed. */ - IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3), + IB_RAW_PACKET_CAP_DELAY_DROP = IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP, }; enum ib_wq_type { @@ -4304,7 +4322,7 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev, return -EINVAL; if (flags & IB_ACCESS_ON_DEMAND && - !(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING)) return -EINVAL; return 0; } diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index cbe3c2811455..f3d5377b217a 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -90,8 +90,7 @@ struct opa_vnic_stats { static inline bool rdma_cap_opa_vnic(struct ib_device *device) { - return !!(device->attrs.device_cap_flags & - IB_DEVICE_RDMA_NETDEV_OPA); + return !!(device->attrs.kernel_cap_flags & IBK_RDMA_NETDEV_OPA); } #endif /* _OPA_VNIC_H */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 7ee73a0652f1..7dd903d932e5 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -1298,4 +1298,46 @@ struct ib_uverbs_ex_modify_cq { #define IB_DEVICE_NAME_MAX 64 +/* + * bits 9, 15, 16, 19, 22, 27, 30, 31, 32, 33, 35 and 37 may be set by old + * kernels and should not be used. + */ +enum ib_uverbs_device_cap_flags { + IB_UVERBS_DEVICE_RESIZE_MAX_WR = 1 << 0, + IB_UVERBS_DEVICE_BAD_PKEY_CNTR = 1 << 1, + IB_UVERBS_DEVICE_BAD_QKEY_CNTR = 1 << 2, + IB_UVERBS_DEVICE_RAW_MULTI = 1 << 3, + IB_UVERBS_DEVICE_AUTO_PATH_MIG = 1 << 4, + IB_UVERBS_DEVICE_CHANGE_PHY_PORT = 1 << 5, + IB_UVERBS_DEVICE_UD_AV_PORT_ENFORCE = 1 << 6, + IB_UVERBS_DEVICE_CURR_QP_STATE_MOD = 1 << 7, + IB_UVERBS_DEVICE_SHUTDOWN_PORT = 1 << 8, + /* IB_UVERBS_DEVICE_INIT_TYPE = 1 << 9, (not in use) */ + IB_UVERBS_DEVICE_PORT_ACTIVE_EVENT = 1 << 10, + IB_UVERBS_DEVICE_SYS_IMAGE_GUID = 1 << 11, + IB_UVERBS_DEVICE_RC_RNR_NAK_GEN = 1 << 12, + IB_UVERBS_DEVICE_SRQ_RESIZE = 1 << 13, + IB_UVERBS_DEVICE_N_NOTIFY_CQ = 1 << 14, + IB_UVERBS_DEVICE_MEM_WINDOW = 1 << 17, + IB_UVERBS_DEVICE_UD_IP_CSUM = 1 << 18, + IB_UVERBS_DEVICE_XRC = 1 << 20, + IB_UVERBS_DEVICE_MEM_MGT_EXTENSIONS = 1 << 21, + IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2A = 1 << 23, + IB_UVERBS_DEVICE_MEM_WINDOW_TYPE_2B = 1 << 24, + IB_UVERBS_DEVICE_RC_IP_CSUM = 1 << 25, + /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_IP_CSUM. */ + IB_UVERBS_DEVICE_RAW_IP_CSUM = 1 << 26, + IB_UVERBS_DEVICE_MANAGED_FLOW_STEERING = 1 << 29, + /* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */ + IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34, + IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36, +}; + +enum ib_uverbs_raw_packet_caps { + IB_UVERBS_RAW_PACKET_CAP_CVLAN_STRIPPING = 1 << 0, + IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS = 1 << 1, + IB_UVERBS_RAW_PACKET_CAP_IP_CSUM = 1 << 2, + IB_UVERBS_RAW_PACKET_CAP_DELAY_DROP = 1 << 3, +}; + #endif /* IB_USER_VERBS_H */ diff --git a/net/rds/ib.c b/net/rds/ib.c index 24c9a9005a6f..9826fe7f9d00 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -154,8 +154,8 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE); rds_ibdev->odp_capable = - !!(device->attrs.device_cap_flags & - IB_DEVICE_ON_DEMAND_PAGING) && + !!(device->attrs.kernel_cap_flags & + IBK_ON_DEMAND_PAGING) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & IB_ODP_SUPPORT_WRITE) && !!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps & diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 3fcd8e1b2550..de0bdb6b729f 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -195,7 +195,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) ep->re_attr.cap.max_recv_sge = 1; ep->re_mrtype = IB_MR_TYPE_MEM_REG; - if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + if (attrs->kernel_cap_flags & IBK_SG_GAPS_REG) ep->re_mrtype = IB_MR_TYPE_SG_GAPS; /* Quirk: Some devices advertise a large max_fast_reg_page_list_len |