summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx5
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-05-01 09:15:05 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-05-01 09:15:05 -0700
commitf34b2cf17825d69ae1e227871059ab18c2f57817 (patch)
tree501d035f600f88e56cb1dcfa299af279a4f7147d /drivers/infiniband/hw/mlx5
parent9f67672a817ec046f7554a885f0fe0d60e1bf99f (diff)
parent6da7bda36388ae00822f732c11febfe2ebbb5544 (diff)
downloadlinux-stable-f34b2cf17825d69ae1e227871059ab18c2f57817.tar.gz
linux-stable-f34b2cf17825d69ae1e227871059ab18c2f57817.tar.bz2
linux-stable-f34b2cf17825d69ae1e227871059ab18c2f57817.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This is significantly bug fixes and general cleanups. The noteworthy new features are fairly small: - XRC support for HNS and improves RQ operations - Bug fixes and updates for hns, mlx5, bnxt_re, hfi1, i40iw, rxe, siw and qib - Quite a few general cleanups on spelling, error handling, static checker detections, etc - Increase the number of device ports supported beyond 255. High port count software switches now exist - Several bug fixes for rtrs - mlx5 Device Memory support for host controlled atomics - Report SRQ tables through to rdma-tool" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (145 commits) IB/qib: Remove redundant assignment to ret RDMA/nldev: Add copy-on-fork attribute to get sys command RDMA/bnxt_re: Fix a double free in bnxt_qplib_alloc_res RDMA/siw: Fix a use after free in siw_alloc_mr IB/hfi1: Remove redundant variable rcd RDMA/nldev: Add QP numbers to SRQ information RDMA/nldev: Return SRQ information RDMA/restrack: Add support to get resource tracking for SRQ RDMA/nldev: Return context information RDMA/core: Add CM to restrack after successful attachment to a device RDMA/cma: Skip device which doesn't support CM RDMA/rxe: Fix a bug in rxe_fill_ip_info() RDMA/mlx5: Expose private query port RDMA/mlx4: Remove an unused variable RDMA/mlx5: Fix type assignment for ICM DM IB/mlx5: Set right RoCE l3 type and roce version while deleting GID RDMA/i40iw: Fix error unwinding when i40iw_hmc_sd_one fails RDMA/cxgb4: add missing qpid increment IB/ipoib: Remove unnecessary struct declaration RDMA/bnxt_re: Get rid of custom module reference counting ...
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c101
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h3
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c8
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c10
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c64
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c587
-rw-r--r--drivers/infiniband/hw/mlx5/dm.h68
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c9
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c4
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h4
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c16
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c16
-rw-r--r--drivers/infiniband/hw/mlx5/main.c343
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h182
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c163
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c185
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c17
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c173
20 files changed, 1198 insertions, 758 deletions
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b4c009bb0db6..f43380106bd0 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -6,6 +6,7 @@ mlx5_ib-y := ah.o \
cong.o \
counters.o \
cq.o \
+ dm.o \
doorbell.o \
gsi.o \
ib_virt.o \
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 234f29912ba9..a8db8a051170 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -47,107 +47,6 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
return mlx5_cmd_exec_inout(dev, query_cong_params, in, out);
}
-int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
- u64 length, u32 alignment)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
- >> PAGE_SHIFT;
- u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
- u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
- u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
- u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
- u32 mlx5_alignment;
- u64 page_idx = 0;
- int ret = 0;
-
- if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
- return -EINVAL;
-
- /* mlx5 device sets alignment as 64*2^driver_value
- * so normalizing is needed.
- */
- mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
- alignment - MLX5_MEMIC_BASE_ALIGN;
- if (mlx5_alignment > max_alignment)
- return -EINVAL;
-
- MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
- MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
- MLX5_SET(alloc_memic_in, in, memic_size, length);
- MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
- mlx5_alignment);
-
- while (page_idx < num_memic_hw_pages) {
- spin_lock(&dm->lock);
- page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages,
- num_memic_hw_pages,
- page_idx,
- num_pages, 0);
-
- if (page_idx < num_memic_hw_pages)
- bitmap_set(dm->memic_alloc_pages,
- page_idx, num_pages);
-
- spin_unlock(&dm->lock);
-
- if (page_idx >= num_memic_hw_pages)
- break;
-
- MLX5_SET64(alloc_memic_in, in, range_start_addr,
- hw_start_addr + (page_idx * PAGE_SIZE));
-
- ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out);
- if (ret) {
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- page_idx, num_pages);
- spin_unlock(&dm->lock);
-
- if (ret == -EAGAIN) {
- page_idx++;
- continue;
- }
-
- return ret;
- }
-
- *addr = dev->bar_addr +
- MLX5_GET64(alloc_memic_out, out, memic_start_addr);
-
- return 0;
- }
-
- return -ENOMEM;
-}
-
-void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
- u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
- u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {};
- u64 start_page_idx;
- int err;
-
- addr -= dev->bar_addr;
- start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
-
- MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
- MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
- MLX5_SET(dealloc_memic_in, in, memic_size, length);
-
- err = mlx5_cmd_exec_in(dev, dealloc_memic, in);
- if (err)
- return;
-
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- start_page_idx, num_pages);
- spin_unlock(&dm->lock);
-}
-
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 88ea6ef8f2cb..66c96292ed43 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,9 +41,6 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out);
-int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
- u64 length, u32 alignment);
-void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index b9291e482428..0b61df52332a 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -267,7 +267,7 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
}
}
-static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
@@ -304,7 +304,7 @@ alloc_err:
return err;
}
-static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
@@ -397,7 +397,7 @@ static const struct file_operations dbg_cc_fops = {
.read = get_param,
};
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
{
if (!mlx5_debugfs_root ||
!dev->port[port_num].dbg_cc_params ||
@@ -409,7 +409,7 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dev->port[port_num].dbg_cc_params = NULL;
}
-void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_core_dev *mdev;
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 084652e2b15a..e365341057cb 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -139,7 +139,7 @@ static int mlx5_ib_create_counters(struct ib_counters *counters,
static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
- u8 port_num)
+ u32 port_num)
{
return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
&dev->port[port_num].cnts;
@@ -154,7 +154,7 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
* device port combination in switchdev and non switchdev mode of the
* parent device.
*/
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
{
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
@@ -162,7 +162,7 @@ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
}
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+ u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
@@ -236,13 +236,13 @@ free:
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port_num, int index)
+ u32 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
struct mlx5_core_dev *mdev;
int ret, num_counters;
- u8 mdev_port_num;
+ u32 mdev_port_num;
if (!stats)
return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
index 1aa30c2f3f4d..6bcaaa52e2b2 100644
--- a/drivers/infiniband/hw/mlx5/counters.h
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -13,5 +13,5 @@ void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_clear_description(struct ib_counters *counters);
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd);
-u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 07b8350929cd..a0b677accd96 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2185,27 +2185,69 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
return 0;
}
+static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem,
+ unsigned long pgsz_bitmap)
+{
+ unsigned long page_size;
+
+ /* Don't bother checking larger page sizes as offset must be zero and
+ * total DEVX umem length must be equal to total umem length.
+ */
+ pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length),
+ PAGE_SHIFT),
+ MLX5_ADAPTER_PAGE_SHIFT);
+ if (!pgsz_bitmap)
+ return 0;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX);
+ if (!page_size)
+ return 0;
+
+ /* If the page_size is less than the CPU page size then we can use the
+ * offset and create a umem which is a subset of the page list.
+ * For larger page sizes we can't be sure the DMA list reflects the
+ * VA so we must ensure that the umem extent is exactly equal to the
+ * page list. Reduce the page size until one of these cases is true.
+ */
+ while ((ib_umem_dma_offset(umem, page_size) != 0 ||
+ (umem->length % page_size) != 0) &&
+ page_size > PAGE_SIZE)
+ page_size /= 2;
+
+ return page_size;
+}
+
static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
struct uverbs_attr_bundle *attrs,
struct devx_umem *obj,
struct devx_umem_reg_cmd *cmd)
{
+ unsigned long pgsz_bitmap;
unsigned int page_size;
__be64 *mtt;
void *umem;
+ int ret;
/*
- * We don't know what the user intends to use this umem for, but the HW
- * restrictions must be met. MR, doorbell records, QP, WQ and CQ all
- * have different requirements. Since we have no idea how to sort this
- * out, only support PAGE_SIZE with the expectation that userspace will
- * provide the necessary alignments inside the known PAGE_SIZE and that
- * FW will check everything.
+ * If the user does not pass in pgsz_bitmap then the user promises not
+ * to use umem_offset!=0 in any commands that allocate on top of the
+ * umem.
+ *
+ * If the user wants to use a umem_offset then it must pass in
+ * pgsz_bitmap which guides the maximum page size and thus maximum
+ * object alignment inside the umem. See the PRM.
+ *
+ * Users are not allowed to use IOVA here, mkeys are not supported on
+ * umem.
*/
- page_size = ib_umem_find_best_pgoff(
- obj->umem, PAGE_SIZE,
- __mlx5_page_offset_to_bitmask(__mlx5_bit_sz(umem, page_offset),
- 0));
+ ret = uverbs_get_const_default(&pgsz_bitmap, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ GENMASK_ULL(63,
+ min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT)));
+ if (ret)
+ return ret;
+
+ page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap);
if (!page_size)
return -EINVAL;
@@ -2791,6 +2833,8 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_MANDATORY),
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
enum ib_access_flags),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ u64),
UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
new file mode 100644
index 000000000000..094bf85589db
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -0,0 +1,587 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "dm.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&dm->lock);
+ page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(dm->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&dm->lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out);
+ if (ret) {
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&dm->lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = dev->bar_addr +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {};
+ u64 start_page_idx;
+ int err;
+
+ addr -= dev->bar_addr;
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec_in(dev, dealloc_memic, in);
+ if (err)
+ return;
+
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&dm->lock);
+}
+
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_DEALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ mlx5_cmd_exec_in(dev, modify_memic, in);
+}
+
+static int mlx5_cmd_alloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation, phys_addr_t *op_addr)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+ int err;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_ALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ err = mlx5_cmd_exec_inout(dev, modify_memic, in, out);
+ if (err)
+ return err;
+
+ *op_addr = dev->bar_addr +
+ MLX5_GET64(modify_memic_out, out, memic_operation_addr);
+ return 0;
+}
+
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+ struct mlx5_user_mmap_entry *mentry, u8 mmap_flag,
+ size_t size, u64 address)
+{
+ mentry->mmap_flag = mmap_flag;
+ mentry->address = address;
+
+ return rdma_user_mmap_entry_insert_range(
+ context, &mentry->rdma_entry, size,
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static void mlx5_ib_dm_memic_free(struct kref *kref)
+{
+ struct mlx5_ib_dm_memic *dm =
+ container_of(kref, struct mlx5_ib_dm_memic, ref);
+ struct mlx5_ib_dev *dev = to_mdev(dm->base.ibdm.device);
+
+ mlx5_cmd_dealloc_memic(&dev->dm, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+}
+
+static int copy_op_to_user(struct mlx5_ib_dm_op_entry *op_entry,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ page_idx = op_entry->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ start_offset = op_entry->op_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+}
+
+static int map_existing_op(struct mlx5_ib_dm_memic *dm, u8 op,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+
+ op_entry = xa_load(&dm->ops, op);
+ if (!op_entry)
+ return -ENOENT;
+
+ return copy_op_to_user(op_entry, attrs);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ struct ib_dm *ibdm = uobj->object;
+ struct mlx5_ib_dm_memic *dm = to_memic(ibdm);
+ struct mlx5_ib_dm_op_entry *op_entry;
+ int err;
+ u8 op;
+
+ err = uverbs_copy_from(&op, attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP);
+ if (err)
+ return err;
+
+ if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&dm->ops_xa_lock);
+ err = map_existing_op(dm, op, attrs);
+ if (!err || err != -ENOENT)
+ goto err_unlock;
+
+ op_entry = kzalloc(sizeof(*op_entry), GFP_KERNEL);
+ if (!op_entry)
+ goto err_unlock;
+
+ err = mlx5_cmd_alloc_memic_op(&dev->dm, dm->base.dev_addr, op,
+ &op_entry->op_addr);
+ if (err) {
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ op_entry->op = op;
+ op_entry->dm = dm;
+
+ err = add_dm_mmap_entry(uobj->context, &op_entry->mentry,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP, dm->base.size,
+ op_entry->op_addr & PAGE_MASK);
+ if (err) {
+ mlx5_cmd_dealloc_memic_op(&dev->dm, dm->base.dev_addr, op);
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ /* From this point, entry will be freed by mmap_free */
+ kref_get(&dm->ref);
+
+ err = copy_op_to_user(op_entry, attrs);
+ if (err)
+ goto err_remove;
+
+ err = xa_insert(&dm->ops, op, op_entry, GFP_KERNEL);
+ if (err)
+ goto err_remove;
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return 0;
+
+err_remove:
+ rdma_user_mmap_entry_remove(&op_entry->mentry.rdma_entry);
+err_unlock:
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return err;
+}
+
+static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+ struct mlx5_ib_dm_memic *dm;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+ u64 address;
+
+ if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = MLX5_IB_UAPI_DM_TYPE_MEMIC;
+ dm->base.size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ dm->base.ibdm.device = ctx->device;
+
+ kref_init(&dm->ref);
+ xa_init(&dm->ops);
+ mutex_init(&dm->ops_xa_lock);
+ dm->req_length = attr->length;
+
+ err = mlx5_cmd_alloc_memic(dm_db, &dm->base.dev_addr,
+ dm->base.size, attr->alignment);
+ if (err) {
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ address = dm->base.dev_addr & PAGE_MASK;
+ err = add_dm_mmap_entry(ctx, &dm->mentry, MLX5_IB_MMAP_TYPE_MEMIC,
+ dm->base.size, address);
+ if (err) {
+ mlx5_cmd_dealloc_memic(dm_db, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_copy;
+
+ start_offset = dm->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_copy;
+
+ return &dm->base.ibdm;
+
+err_copy:
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+ return ERR_PTR(err);
+}
+
+static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
+{
+ return uapi_type == MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM ?
+ MLX5_SW_ICM_TYPE_STEERING :
+ MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+}
+
+static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs,
+ int type)
+{
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
+ enum mlx5_sw_icm_type icm_type = get_icm_type(type);
+ struct mlx5_ib_dm_icm *dm;
+ u64 act_size;
+ int err;
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = type;
+ dm->base.ibdm.device = ctx->device;
+
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto free;
+ }
+
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ /* Allocation size must a multiple of the basic block size
+ * and a power of 2.
+ */
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ act_size = roundup_pow_of_two(act_size);
+
+ dm->base.size = act_size;
+ err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
+ to_mucontext(ctx)->devx_uid,
+ &dm->base.dev_addr, &dm->obj_id);
+ if (err)
+ goto free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &dm->base.dev_addr, sizeof(dm->base.dev_addr));
+ if (err) {
+ mlx5_dm_sw_icm_dealloc(dev, icm_type, dm->base.size,
+ to_mucontext(ctx)->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ goto free;
+ }
+ return &dm->base.ibdm;
+free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ enum mlx5_ib_uapi_dm_type type;
+ int err;
+
+ err = uverbs_get_const_default(&type, attrs,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ MLX5_IB_UAPI_DM_TYPE_MEMIC);
+ if (err)
+ return ERR_PTR(err);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
+ type, attr->length, attr->alignment);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ return handle_alloc_dm_memic(context, attr, attrs);
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ return handle_alloc_dm_sw_icm(context, attr, attrs, type);
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return handle_alloc_dm_sw_icm(context, attr, attrs, type);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void dm_memic_remove_ops(struct mlx5_ib_dm_memic *dm)
+{
+ struct mlx5_ib_dm_op_entry *entry;
+ unsigned long idx;
+
+ mutex_lock(&dm->ops_xa_lock);
+ xa_for_each(&dm->ops, idx, entry) {
+ xa_erase(&dm->ops, idx);
+ rdma_user_mmap_entry_remove(&entry->mentry.rdma_entry);
+ }
+ mutex_unlock(&dm->ops_xa_lock);
+}
+
+static void mlx5_dm_memic_dealloc(struct mlx5_ib_dm_memic *dm)
+{
+ dm_memic_remove_ops(dm);
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+}
+
+static int mlx5_dm_icm_dealloc(struct mlx5_ib_ucontext *ctx,
+ struct mlx5_ib_dm_icm *dm)
+{
+ enum mlx5_sw_icm_type type = get_icm_type(dm->base.type);
+ struct mlx5_core_dev *dev = to_mdev(dm->base.ibdm.device)->mdev;
+ int err;
+
+ err = mlx5_dm_sw_icm_dealloc(dev, type, dm->base.size, ctx->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ if (!err)
+ kfree(dm);
+ return 0;
+}
+
+static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+
+ switch (dm->type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ mlx5_dm_memic_dealloc(to_memic(ibdm));
+ return 0;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm *ibdm =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ struct mlx5_ib_dm_memic *memic;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ if (dm->type != MLX5_IB_UAPI_DM_TYPE_MEMIC)
+ return -EOPNOTSUPP;
+
+ memic = to_memic(ibdm);
+ page_idx = memic->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ start_offset = memic->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ &memic->req_length,
+ sizeof(memic->req_length));
+}
+
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+ struct mlx5_ib_dm_memic *mdm;
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ mdm = container_of(mentry, struct mlx5_ib_dm_memic, mentry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ op_entry = container_of(mentry, struct mlx5_ib_dm_op_entry,
+ mentry);
+ mdm = op_entry->dm;
+ mlx5_cmd_dealloc_memic_op(&dev->dm, mdm->base.dev_addr,
+ op_entry->op);
+ kfree(op_entry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_dm, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ enum mlx5_ib_uapi_dm_type, UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_MAP_OP_ADDR,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DM,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_MAP_OP_ADDR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_QUERY));
+
+const struct uapi_definition mlx5_ib_dm_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM),
+ {},
+};
+
+const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
diff --git a/drivers/infiniband/hw/mlx5/dm.h b/drivers/infiniband/hw/mlx5/dm.h
new file mode 100644
index 000000000000..9674a80d8d70
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DM_H
+#define _MLX5_IB_DM_H
+
+#include "mlx5_ib.h"
+
+extern const struct ib_device_ops mlx5_ib_dev_dm_ops;
+extern const struct uapi_definition mlx5_ib_dm_defs[];
+
+struct mlx5_ib_dm {
+ struct ib_dm ibdm;
+ u32 type;
+ phys_addr_t dev_addr;
+ size_t size;
+};
+
+struct mlx5_ib_dm_op_entry {
+ struct mlx5_user_mmap_entry mentry;
+ phys_addr_t op_addr;
+ struct mlx5_ib_dm_memic *dm;
+ u8 op;
+};
+
+struct mlx5_ib_dm_memic {
+ struct mlx5_ib_dm base;
+ struct mlx5_user_mmap_entry mentry;
+ struct xarray ops;
+ struct mutex ops_xa_lock;
+ struct kref ref;
+ size_t req_length;
+};
+
+struct mlx5_ib_dm_icm {
+ struct mlx5_ib_dm base;
+ u32 obj_id;
+};
+
+static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm, ibdm);
+}
+
+static inline struct mlx5_ib_dm_memic *to_memic(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_memic, base.ibdm);
+}
+
+static inline struct mlx5_ib_dm_icm *to_icm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_icm, base.ibdm);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length);
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation);
+
+#endif /* _MLX5_IB_DM_H */
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 01370d9a871a..2fc6a60c4e77 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -1528,8 +1528,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
dst_num++;
}
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher,
- flow_context, flow_act,
+ handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
+ fs_matcher, flow_context, flow_act,
cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
@@ -1885,8 +1885,9 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
else
*dest_id = mqp->raw_packet_qp.rq.tirn;
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
+ } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+ !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index db5de720bb12..b25e0b33a11a 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -29,7 +29,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- int num_ports = mlx5_eswitch_get_total_vports(dev);
+ u32 num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile;
struct mlx5_ib_dev *ibdev;
int vport_index;
@@ -110,7 +110,7 @@ struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port)
+ u32 port)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct mlx5_eswitch_rep *rep;
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index ce1dcb105dbd..9c55e5c528b4 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -16,7 +16,7 @@ int mlx5r_rep_init(void);
void mlx5r_rep_cleanup(void);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port);
+ u32 port);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
@@ -25,7 +25,7 @@ static inline void mlx5r_rep_cleanup(void) {}
static inline
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port)
+ u32 port)
{
return NULL;
}
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index 46b2d370fb3f..f2f62875d072 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -48,7 +48,7 @@ static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
}
}
-int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -91,7 +91,7 @@ static inline enum port_state_policy net_to_mlx_policy(int policy)
}
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state)
+ u32 port, int state)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -119,7 +119,7 @@ out:
}
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats)
+ u32 port, struct ifla_vf_stats *stats)
{
int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
struct mlx5_core_dev *mdev;
@@ -149,7 +149,8 @@ ex:
return err;
}
-static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_node_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -172,7 +173,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
return err;
}
-static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_port_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -195,7 +197,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
return err;
}
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type)
{
if (type == IFLA_VF_IB_NODE_GUID)
@@ -206,7 +208,7 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
return -EINVAL;
}
-int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid)
{
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 652c6ccf1881..ec242a5a17a3 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,7 +42,7 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
-static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u32 port_num,
struct ib_mad *in_mad)
{
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
@@ -52,7 +52,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
}
static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
- int ignore_bkey, u8 port, const struct ib_wc *in_wc,
+ int ignore_bkey, u32 port, const struct ib_wc *in_wc,
const struct ib_grh *in_grh, const void *in_mad,
void *response_mad)
{
@@ -147,12 +147,12 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num,
+static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
struct mlx5_core_dev *mdev;
bool native_port = true;
- u8 mdev_port_num;
+ u32 mdev_port_num;
void *out_cnt;
int err;
@@ -216,7 +216,7 @@ done:
return err;
}
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index)
@@ -444,7 +444,7 @@ out:
return err;
}
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
struct ib_smp *in_mad = NULL;
@@ -473,7 +473,7 @@ out:
return err;
}
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct ib_smp *in_mad = NULL;
@@ -513,7 +513,7 @@ out:
return err;
}
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7a7f6ccd02a5..6d1dd09a4388 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -34,6 +34,7 @@
#include "ib_rep.h"
#include "cmd.h"
#include "devx.h"
+#include "dm.h"
#include "fs.h"
#include "srq.h"
#include "qp.h"
@@ -42,6 +43,7 @@
#include "counters.h"
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/ib_umem_odp.h>
@@ -100,7 +102,7 @@ mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
}
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+mlx5_ib_port_link_layer(struct ib_device *device, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
@@ -109,7 +111,7 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
}
static int get_port_state(struct ib_device *ibdev,
- u8 port_num,
+ u32 port_num,
enum ib_port_state *state)
{
struct ib_port_attr attr;
@@ -124,7 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev,
- u8 *port_num)
+ u32 *port_num)
{
struct net_device *rep_ndev;
struct mlx5_ib_port *port;
@@ -154,7 +156,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
{
struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- u8 port_num = roce->native_port_num;
+ u32 port_num = roce->native_port_num;
struct mlx5_core_dev *mdev;
struct mlx5_ib_dev *ibdev;
@@ -233,7 +235,7 @@ done:
}
static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
struct mlx5_ib_dev *ibdev = to_mdev(device);
struct net_device *ndev;
@@ -261,8 +263,8 @@ out:
}
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
- u8 ib_port_num,
- u8 *native_port_num)
+ u32 ib_port_num,
+ u32 *native_port_num)
{
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
ib_port_num);
@@ -296,7 +298,7 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
return mdev;
}
-void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num)
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 port_num)
{
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
port_num);
@@ -452,7 +454,7 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
active_width);
}
-static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -462,7 +464,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
bool put_mdev = true;
u32 eth_prot_oper;
- u8 mdev_port_num;
+ u32 mdev_port_num;
bool ext;
int err;
@@ -498,7 +500,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width, ext);
- if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) {
+ if (!dev->is_rep && dev->mdev->roce.roce_en) {
u16 qkey_viol_cntr;
props->port_cap_flags |= IB_PORT_CM_SUP;
@@ -549,19 +551,19 @@ out:
return err;
}
-static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+static int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
+ enum ib_gid_type gid_type;
u16 vlan_id = 0xffff;
u8 roce_version = 0;
u8 roce_l3_type = 0;
u8 mac[ETH_ALEN];
int ret;
+ gid_type = attr->gid_type;
if (gid) {
- gid_type = attr->gid_type;
ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
if (ret)
return ret;
@@ -573,7 +575,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
roce_version = MLX5_ROCE_VERSION_2;
- if (ipv6_addr_v4mapped((void *)gid))
+ if (gid && ipv6_addr_v4mapped((void *)gid))
roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
else
roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
@@ -600,7 +602,7 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
return set_roce_addr(to_mdev(attr->device), attr->port_num,
- attr->index, NULL, NULL);
+ attr->index, NULL, attr);
}
__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
@@ -1267,7 +1269,7 @@ static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
return 0;
}
-static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
+static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1335,7 +1337,7 @@ out:
return err;
}
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
unsigned int count;
@@ -1380,13 +1382,13 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return ret;
}
-static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
return mlx5_query_port_roce(ibdev, port, props);
}
-static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
/* Default special Pkey for representor device port as per the
@@ -1396,7 +1398,7 @@ static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
return 0;
}
-static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1415,13 +1417,13 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
-static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
+static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u32 port,
u16 index, u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev;
bool put_mdev = true;
- u8 mdev_port_num;
+ u32 mdev_port_num;
int err;
mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
@@ -1442,7 +1444,7 @@ static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
return err;
}
-static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
switch (mlx5_get_vport_access_method(ibdev)) {
@@ -1486,12 +1488,12 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
-static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u32 port_num, u32 mask,
u32 value)
{
struct mlx5_hca_vport_context ctx = {};
struct mlx5_core_dev *mdev;
- u8 mdev_port_num;
+ u32 mdev_port_num;
int err;
mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
@@ -1520,7 +1522,7 @@ out:
return err;
}
-static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
struct ib_port_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1929,7 +1931,7 @@ uar_done:
print_lib_caps(dev, context->lib_caps);
if (mlx5_ib_lag_should_assign_affinity(dev)) {
- u8 port = mlx5_core_native_port_num(dev->mdev) - 1;
+ u32 port = mlx5_core_native_port_num(dev->mdev) - 1;
atomic_set(&context->tx_port_affinity,
atomic_add_return(
@@ -2087,14 +2089,11 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
struct mlx5_var_table *var_table = &dev->var_table;
- struct mlx5_ib_dm *mdm;
switch (mentry->mmap_flag) {
case MLX5_IB_MMAP_TYPE_MEMIC:
- mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
- mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
- mdm->size);
- kfree(mdm);
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ mlx5_ib_dm_mmap_free(dev, mentry);
break;
case MLX5_IB_MMAP_TYPE_VAR:
mutex_lock(&var_table->bitmap_lock);
@@ -2219,19 +2218,6 @@ free_bfreg:
return err;
}
-static int add_dm_mmap_entry(struct ib_ucontext *context,
- struct mlx5_ib_dm *mdm,
- u64 address)
-{
- mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
- mdm->mentry.address = address;
- return rdma_user_mmap_entry_insert_range(
- context, &mdm->mentry.rdma_entry,
- mdm->size,
- MLX5_IB_MMAP_DEVICE_MEM << 16,
- (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
-}
-
static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
{
unsigned long idx;
@@ -2333,206 +2319,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
return 0;
}
-static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
- u32 type)
-{
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
- return -EOPNOTSUPP;
- break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- if (!capable(CAP_SYS_RAWIO) ||
- !capable(CAP_NET_RAW))
- return -EPERM;
-
- if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2)))
- return -EOPNOTSUPP;
- break;
- }
-
- return 0;
-}
-
-static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
- struct mlx5_ib_dm *dm,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
- u64 start_offset;
- u16 page_idx;
- int err;
- u64 address;
-
- dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
-
- err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
- dm->size, attr->alignment);
- if (err)
- return err;
-
- address = dm->dev_addr & PAGE_MASK;
- err = add_dm_mmap_entry(ctx, dm, address);
- if (err)
- goto err_dealloc;
-
- page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- &page_idx,
- sizeof(page_idx));
- if (err)
- goto err_copy;
-
- start_offset = dm->dev_addr & ~PAGE_MASK;
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- &start_offset, sizeof(start_offset));
- if (err)
- goto err_copy;
-
- return 0;
-
-err_copy:
- rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
-err_dealloc:
- mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
-
- return err;
-}
-
-static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
- struct mlx5_ib_dm *dm,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs,
- int type)
-{
- struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
- u64 act_size;
- int err;
-
- /* Allocation size must a multiple of the basic block size
- * and a power of 2.
- */
- act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
- act_size = roundup_pow_of_two(act_size);
-
- dm->size = act_size;
- err = mlx5_dm_sw_icm_alloc(dev, type, act_size, attr->alignment,
- to_mucontext(ctx)->devx_uid, &dm->dev_addr,
- &dm->icm_dm.obj_id);
- if (err)
- return err;
-
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- &dm->dev_addr, sizeof(dm->dev_addr));
- if (err)
- mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
- to_mucontext(ctx)->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
-
- return err;
-}
-
-struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dm *dm;
- enum mlx5_ib_uapi_dm_type type;
- int err;
-
- err = uverbs_get_const_default(&type, attrs,
- MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
- MLX5_IB_UAPI_DM_TYPE_MEMIC);
- if (err)
- return ERR_PTR(err);
-
- mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
- type, attr->length, attr->alignment);
-
- err = check_dm_type_support(to_mdev(ibdev), type);
- if (err)
- return ERR_PTR(err);
-
- dm = kzalloc(sizeof(*dm), GFP_KERNEL);
- if (!dm)
- return ERR_PTR(-ENOMEM);
-
- dm->type = type;
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- err = handle_alloc_dm_memic(context, dm,
- attr,
- attrs);
- break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- err = handle_alloc_dm_sw_icm(context, dm,
- attr, attrs,
- MLX5_SW_ICM_TYPE_STEERING);
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- err = handle_alloc_dm_sw_icm(context, dm,
- attr, attrs,
- MLX5_SW_ICM_TYPE_HEADER_MODIFY);
- break;
- default:
- err = -EOPNOTSUPP;
- }
-
- if (err)
- goto err_free;
-
- return &dm->ibdm;
-
-err_free:
- kfree(dm);
- return ERR_PTR(err);
-}
-
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
- &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
- struct mlx5_ib_dm *dm = to_mdm(ibdm);
- int ret;
-
- switch (dm->type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
- return 0;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
- dm->size, ctx->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
- if (ret)
- return ret;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
- dm->size, ctx->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
- if (ret)
- return ret;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- kfree(dm);
-
- return 0;
-}
-
static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct mlx5_ib_pd *pd = to_mpd(ibpd);
@@ -2779,7 +2565,7 @@ static void delay_drop_handler(struct work_struct *work)
static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
- u8 port = (eqe->data.port.port >> 4) & 0xf;
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
@@ -2795,7 +2581,7 @@ static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe
static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
- u8 port = (eqe->data.port.port >> 4) & 0xf;
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
ibev->element.port_num = port;
@@ -3152,7 +2938,7 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
return ret;
}
-static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -3180,7 +2966,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return 0;
}
-static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -3252,7 +3038,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
{
int err;
@@ -3266,7 +3052,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
return 0;
}
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
{
if (dev->port[port_num].roce.nb.notifier_call) {
unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
@@ -3300,7 +3086,7 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
+static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
{
@@ -3352,7 +3138,7 @@ static const struct file_operations fops_delay_drop_timeout = {
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
struct mlx5_ib_port *port = &ibdev->port[port_num];
int comps;
int err;
@@ -3398,7 +3184,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
- mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
+ mlx5_ib_dbg(ibdev, "unaffiliated port %u\n", port_num + 1);
/* Log an error, still needed to cleanup the pointers and add
* it back to the list.
*/
@@ -3412,14 +3198,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
int err;
lockdep_assert_held(&mlx5_ib_multiport_mutex);
spin_lock(&ibdev->port[port_num].mp.mpi_lock);
if (ibdev->port[port_num].mp.mpi) {
- mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
+ mlx5_ib_dbg(ibdev, "port %u already affiliated.\n",
port_num + 1);
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
return false;
@@ -3455,12 +3241,12 @@ unbind:
static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
{
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
port_num + 1);
struct mlx5_ib_multiport_info *mpi;
int err;
- int i;
+ u32 i;
if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
return 0;
@@ -3523,10 +3309,10 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
{
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
port_num + 1);
- int i;
+ u32 i;
if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
return;
@@ -3539,7 +3325,8 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
kfree(dev->port[i].mp.mpi);
dev->port[i].mp.mpi = NULL;
} else {
- mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
+ mlx5_ib_dbg(dev, "unbinding port_num: %u\n",
+ i + 1);
mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
}
}
@@ -3816,20 +3603,6 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
&UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
ADD_UVERBS_ATTRIBUTES_SIMPLE(
- mlx5_ib_dm,
- UVERBS_OBJECT_DM,
- UVERBS_METHOD_DM_ALLOC,
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- UVERBS_ATTR_TYPE(u64),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- UVERBS_ATTR_TYPE(u16),
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
- enum mlx5_ib_uapi_dm_type,
- UA_OPTIONAL));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
mlx5_ib_flow_action,
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
@@ -3851,10 +3624,10 @@ static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
&mlx5_ib_flow_action),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
@@ -3891,8 +3664,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}
- mlx5_ib_internal_fill_odp_caps(dev);
-
err = mlx5_ib_init_multiport_master(dev);
if (err)
return err;
@@ -4032,12 +3803,6 @@ static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
-static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
- .alloc_dm = mlx5_ib_alloc_dm,
- .dealloc_dm = mlx5_ib_dealloc_dm,
- .reg_dm_mr = mlx5_ib_reg_dm_mr,
-};
-
static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -4160,7 +3925,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
- u8 port_num = 0;
+ u32 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
@@ -4173,7 +3938,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
/* Register only for native ports */
err = mlx5_add_netdev_notifier(dev, port_num);
- if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev))
+ if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
/*
* We don't enable ETH interface for
* 1. IB representors
@@ -4197,7 +3962,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
- u8 port_num;
+ u32 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
@@ -4710,7 +4475,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
dev->mdev = mdev;
dev->num_ports = num_ports;
- if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev))
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
profile = &raw_eth_profile;
else
profile = &pf_profile;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 88cc26e008fc..e9a3f34a30b8 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -166,6 +166,7 @@ enum mlx5_ib_mmap_type {
MLX5_IB_MMAP_TYPE_VAR = 2,
MLX5_IB_MMAP_TYPE_UAR_WC = 3,
MLX5_IB_MMAP_TYPE_UAR_NC = 4,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP = 5,
};
struct mlx5_bfreg_info {
@@ -406,7 +407,7 @@ struct mlx5_ib_qp_base {
struct mlx5_ib_qp_trans {
struct mlx5_ib_qp_base base;
u16 xrcdn;
- u8 alt_port;
+ u32 alt_port;
u8 atomic_rd_en;
u8 resp_depth;
};
@@ -453,7 +454,7 @@ struct mlx5_ib_dct {
struct mlx5_ib_gsi_qp {
struct ib_qp *rx_qp;
- u8 port_num;
+ u32 port_num;
struct ib_qp_cap cap;
struct ib_cq *cq;
struct mlx5_ib_gsi_wr *outstanding_wrs;
@@ -490,7 +491,7 @@ struct mlx5_ib_qp {
struct mutex mutex;
/* cached variant of create_flags from struct ib_qp_init_attr */
u32 flags;
- u8 port;
+ u32 port;
u8 state;
int max_inline_data;
struct mlx5_bf bf;
@@ -547,11 +548,6 @@ static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
return container_of(wr, struct mlx5_umr_wr, wr);
}
-struct mlx5_shared_mr_info {
- int mr_id;
- struct ib_umem *umem;
-};
-
enum mlx5_ib_cq_pr_flags {
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
};
@@ -623,20 +619,6 @@ struct mlx5_user_mmap_entry {
u32 page_idx;
};
-struct mlx5_ib_dm {
- struct ib_dm ibdm;
- phys_addr_t dev_addr;
- u32 type;
- size_t size;
- union {
- struct {
- u32 obj_id;
- } icm_dm;
- /* other dm types specific params should be added here */
- };
- struct mlx5_user_mmap_entry mentry;
-};
-
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
#define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
@@ -654,47 +636,69 @@ struct mlx5_ib_dm {
atomic64_add(value, &((mr)->odp_stats.counter_name))
struct mlx5_ib_mr {
- struct ib_mr ibmr;
- void *descs;
- dma_addr_t desc_map;
- int ndescs;
- int data_length;
- int meta_ndescs;
- int meta_length;
- int max_descs;
- int desc_size;
- int access_mode;
- unsigned int page_shift;
- struct mlx5_core_mkey mmkey;
- struct ib_umem *umem;
- struct mlx5_shared_mr_info *smr_info;
- struct list_head list;
- struct mlx5_cache_ent *cache_ent;
- u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
- struct mlx5_core_sig_ctx *sig;
- void *descs_alloc;
- int access_flags; /* Needed for rereg MR */
-
- struct mlx5_ib_mr *parent;
- /* Needed for IB_MR_TYPE_INTEGRITY */
- struct mlx5_ib_mr *pi_mr;
- struct mlx5_ib_mr *klm_mr;
- struct mlx5_ib_mr *mtt_mr;
- u64 data_iova;
- u64 pi_iova;
-
- /* For ODP and implicit */
- struct xarray implicit_children;
+ struct ib_mr ibmr;
+ struct mlx5_core_mkey mmkey;
+
+ /* User MR data */
+ struct mlx5_cache_ent *cache_ent;
+ struct ib_umem *umem;
+
+ /* This is zero'd when the MR is allocated */
union {
- struct list_head elm;
- struct work_struct work;
- } odp_destroy;
- struct ib_odp_counters odp_stats;
- bool is_odp_implicit;
+ /* Used only while the MR is in the cache */
+ struct {
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
+ struct mlx5_async_work cb_work;
+ /* Cache list element */
+ struct list_head list;
+ };
- struct mlx5_async_work cb_work;
+ /* Used only by kernel MRs (umem == NULL) */
+ struct {
+ void *descs;
+ void *descs_alloc;
+ dma_addr_t desc_map;
+ int max_descs;
+ int ndescs;
+ int desc_size;
+ int access_mode;
+
+ /* For Kernel IB_MR_TYPE_INTEGRITY */
+ struct mlx5_core_sig_ctx *sig;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+ int meta_ndescs;
+ int meta_length;
+ int data_length;
+ };
+
+ /* Used only by User MRs (umem != NULL) */
+ struct {
+ unsigned int page_shift;
+ /* Current access_flags */
+ int access_flags;
+
+ /* For User ODP */
+ struct mlx5_ib_mr *parent;
+ struct xarray implicit_children;
+ union {
+ struct work_struct work;
+ } odp_destroy;
+ struct ib_odp_counters odp_stats;
+ bool is_odp_implicit;
+ };
+ };
};
+/* Zero the fields in the mr that are variant depending on usage */
+static inline void mlx5_clear_mr(struct mlx5_ib_mr *mr)
+{
+ memset(mr->out, 0, sizeof(*mr) - offsetof(struct mlx5_ib_mr, out));
+}
+
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
{
return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
@@ -822,7 +826,7 @@ struct mlx5_roce {
atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
- u8 native_port_num;
+ u32 native_port_num;
};
struct mlx5_ib_port {
@@ -837,7 +841,7 @@ struct mlx5_ib_dbg_param {
int offset;
struct mlx5_ib_dev *dev;
struct dentry *dentry;
- u8 port_num;
+ u32 port_num;
};
enum mlx5_ib_dbg_cc_types {
@@ -1063,6 +1067,7 @@ struct mlx5_ib_dev {
struct mutex slow_path_mutex;
struct ib_odp_caps odp_caps;
u64 odp_max_size;
+ struct mutex odp_eq_mutex;
struct mlx5_ib_pf_eq odp_pf_eq;
struct xarray odp_mkeys;
@@ -1170,11 +1175,6 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
return container_of(msrq, struct mlx5_ib_srq, msrq);
}
-static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
-{
- return container_of(ibdm, struct mlx5_ib_dm, ibdm);
-}
-
static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
@@ -1268,8 +1268,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct ib_udata *udata,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
-void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr);
-void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr);
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr);
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -1285,7 +1284,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
int data_sg_nents, unsigned int *data_sg_offset,
struct scatterlist *meta_sg, int meta_sg_nents,
unsigned int *meta_sg_offset);
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in, struct ib_mad *out,
size_t *out_mad_size, u16 *out_mad_pkey_index);
@@ -1300,13 +1299,13 @@ int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id);
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc);
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid);
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey);
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid);
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
@@ -1317,8 +1316,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
unsigned int entry, int access_flags);
-void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
@@ -1332,18 +1329,13 @@ int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
struct ib_rwq_ind_table_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
-struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs);
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs);
struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
@@ -1357,12 +1349,12 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_pf_eq *eq)
{
- return;
+ return 0;
}
-
-static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
@@ -1397,22 +1389,22 @@ int __mlx5_ib_add(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile);
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_info *info);
+ u32 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state);
+ u32 port, int state);
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats);
-int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+ u32 port, struct ifla_vf_stats *stats);
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
struct ifla_vf_guid *node_guid,
struct ifla_vf_guid *port_guid);
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type);
__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
const struct ib_gid_attr *attr);
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
-void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
/* GSI QP helper functions */
int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
@@ -1435,10 +1427,10 @@ void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
int bfregn);
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi);
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
- u8 ib_port_num,
- u8 *native_port_num);
+ u32 ib_port_num,
+ u32 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
- u8 port_num);
+ u32 port_num);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index db05b0e0a8d7..4388afeff251 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -42,6 +42,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/ib_verbs.h>
+#include "dm.h"
#include "mlx5_ib.h"
/*
@@ -119,8 +120,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
create_mkey_callback, context);
}
-static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
@@ -590,6 +589,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
+
+ mlx5_clear_mr(mr);
}
mr->access_flags = access_flags;
return mr;
@@ -615,42 +616,20 @@ static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
ent->available_mrs--;
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
- break;
+ mlx5_clear_mr(mr);
+ return mr;
}
queue_adjust_cache_locked(ent);
spin_unlock_irq(&ent->lock);
}
-
- if (!mr)
- req_ent->miss++;
-
- return mr;
-}
-
-static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
-{
- struct mlx5_cache_ent *ent = mr->cache_ent;
-
- mr->cache_ent = NULL;
- spin_lock_irq(&ent->lock);
- ent->total_mrs--;
- spin_unlock_irq(&ent->lock);
+ req_ent->miss++;
+ return NULL;
}
-void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_cache_ent *ent = mr->cache_ent;
- if (!ent)
- return;
-
- if (mlx5_mr_cache_invalidate(mr)) {
- detach_mr_from_cache(mr);
- destroy_mkey(dev, mr);
- kfree(mr);
- return;
- }
-
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
ent->available_mrs++;
@@ -993,8 +972,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
mr->ibmr.pd = pd;
mr->umem = umem;
- mr->access_flags = access_flags;
- mr->desc_size = sizeof(struct mlx5_mtt);
mr->mmkey.iova = iova;
mr->mmkey.size = umem->length;
mr->mmkey.pd = to_mpd(pd)->pdn;
@@ -1028,7 +1005,7 @@ static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
*/
might_sleep();
- gfp_mask |= __GFP_ZERO;
+ gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
@@ -1505,7 +1482,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
*/
err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
- dereg_mr(dev, mr);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
}
}
@@ -1524,6 +1501,9 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
return ERR_PTR(-EOPNOTSUPP);
+ err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
+ if (err)
+ return ERR_PTR(err);
if (!start && length == U64_MAX) {
if (iova != 0)
return ERR_PTR(-EINVAL);
@@ -1562,7 +1542,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
return &mr->ibmr;
err_dereg_mr:
- dereg_mr(dev, mr);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
}
@@ -1659,19 +1639,19 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return &mr->ibmr;
err_dereg_mr:
- dereg_mr(dev, mr);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
}
/**
- * mlx5_mr_cache_invalidate - Fence all DMA on the MR
+ * revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
- * and any DMA inprogress will be completed. Failure of this function
+ * and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
-int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
+static int revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
@@ -1765,7 +1745,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
* with it. This ensure the change is atomic relative to any use of the
* MR.
*/
- err = mlx5_mr_cache_invalidate(mr);
+ err = revoke_mr(mr);
if (err)
return err;
@@ -1844,7 +1824,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
* Only one active MR can refer to a umem at one time, revoke
* the old MR before assigning the umem to the new one.
*/
- err = mlx5_mr_cache_invalidate(mr);
+ err = revoke_mr(mr);
if (err)
return ERR_PTR(err);
umem = mr->umem;
@@ -1931,7 +1911,7 @@ err:
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (mr->descs) {
+ if (!mr->umem && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1943,69 +1923,82 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
}
}
-static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- if (mr->sig) {
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ int rc;
+
+ /*
+ * Any async use of the mr must hold the refcount, once the refcount
+ * goes to zero no other thread, such as ODP page faults, prefetch, any
+ * UMR activity, etc can touch the mkey. Thus it is safe to destroy it.
+ */
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ refcount_read(&mr->mmkey.usecount) != 0 &&
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), ibmr,
+ NULL, GFP_KERNEL);
+
+ if (mr->mtt_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->mtt_mr = NULL;
+ }
+ if (mr->klm_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->klm_mr = NULL;
+ }
+
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
- xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
kfree(mr->sig);
mr->sig = NULL;
}
+ /* Stop DMA */
+ if (mr->cache_ent) {
+ if (revoke_mr(mr)) {
+ spin_lock_irq(&mr->cache_ent->lock);
+ mr->cache_ent->total_mrs--;
+ spin_unlock_irq(&mr->cache_ent->lock);
+ mr->cache_ent = NULL;
+ }
+ }
if (!mr->cache_ent) {
- destroy_mkey(dev, mr);
- mlx5_free_priv_descs(mr);
+ rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
+ if (rc)
+ return rc;
}
-}
-static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
-{
- struct ib_umem *umem = mr->umem;
+ if (mr->umem) {
+ bool is_odp = is_odp_mr(mr);
- /* Stop all DMA */
- if (is_odp_mr(mr))
- mlx5_ib_fence_odp_mr(mr);
- else if (is_dmabuf_mr(mr))
- mlx5_ib_fence_dmabuf_mr(mr);
- else
- clean_mr(dev, mr);
-
- if (umem) {
- if (!is_odp_mr(mr))
- atomic_sub(ib_umem_num_pages(umem),
+ if (!is_odp)
+ atomic_sub(ib_umem_num_pages(mr->umem),
&dev->mdev->priv.reg_pages);
- ib_umem_release(umem);
+ ib_umem_release(mr->umem);
+ if (is_odp)
+ mlx5_ib_free_odp_mr(mr);
}
- if (mr->cache_ent)
+ if (mr->cache_ent) {
mlx5_mr_cache_free(dev, mr);
- else
+ } else {
+ mlx5_free_priv_descs(mr);
kfree(mr);
-}
-
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
-{
- struct mlx5_ib_mr *mmr = to_mmr(ibmr);
-
- if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
- dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
- dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
}
-
- if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
- mlx5_ib_free_implicit_mr(mmr);
- return 0;
- }
-
- dereg_mr(to_mdev(ibmr->device), mmr);
-
return 0;
}
@@ -2177,10 +2170,10 @@ err_free_descs:
destroy_mkey(dev, mr);
mlx5_free_priv_descs(mr);
err_free_mtt_mr:
- dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
+ mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
mr->mtt_mr = NULL;
err_free_klm_mr:
- dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
+ mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
mr->klm_mr = NULL;
err_destroy_psv:
if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b103555b1f5d..782b2af8f211 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -181,64 +181,29 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
}
}
-static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
-{
- struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
-
- /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */
- mutex_lock(&odp->umem_mutex);
- if (odp->npages) {
- mlx5_mr_cache_invalidate(mr);
- ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp),
- ib_umem_end(odp));
- WARN_ON(odp->npages);
- }
- odp->private = NULL;
- mutex_unlock(&odp->umem_mutex);
-
- if (!mr->cache_ent) {
- mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
- WARN_ON(mr->descs);
- }
-}
-
/*
* This must be called after the mr has been removed from implicit_children.
* NOTE: The MR does not necessarily have to be
* empty here, parallel page faults could have raced with the free process and
* added pages to it.
*/
-static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt)
+static void free_implicit_child_mr_work(struct work_struct *work)
{
+ struct mlx5_ib_mr *mr =
+ container_of(work, struct mlx5_ib_mr, odp_destroy.work);
struct mlx5_ib_mr *imr = mr->parent;
struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
mlx5r_deref_wait_odp_mkey(&mr->mmkey);
- if (need_imr_xlt) {
- mutex_lock(&odp_imr->umem_mutex);
- mlx5_ib_update_xlt(mr->parent, idx, 1, 0,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ATOMIC);
- mutex_unlock(&odp_imr->umem_mutex);
- }
-
- dma_fence_odp_mr(mr);
-
- mr->parent = NULL;
- mlx5_mr_cache_free(mr_to_mdev(mr), mr);
- ib_umem_odp_release(odp);
-}
-
-static void free_implicit_child_mr_work(struct work_struct *work)
-{
- struct mlx5_ib_mr *mr =
- container_of(work, struct mlx5_ib_mr, odp_destroy.work);
- struct mlx5_ib_mr *imr = mr->parent;
+ mutex_lock(&odp_imr->umem_mutex);
+ mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT,
+ 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
- free_implicit_child_mr(mr, true);
mlx5r_deref_odp_mkey(&imr->mmkey);
}
@@ -352,7 +317,7 @@ const struct mmu_interval_notifier_ops mlx5_mn_ops = {
.invalidate = mlx5_ib_invalidate_range,
};
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
struct ib_odp_caps *caps = &dev->odp_caps;
@@ -455,8 +420,10 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
ret = mr = mlx5_mr_cache_alloc(
mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags);
- if (IS_ERR(mr))
- goto out_umem;
+ if (IS_ERR(mr)) {
+ ib_umem_odp_release(odp);
+ return mr;
+ }
mr->ibmr.pd = imr->ibmr.pd;
mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
@@ -506,9 +473,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
out_lock:
xa_unlock(&imr->implicit_children);
out_mr:
- mlx5_mr_cache_free(mr_to_mdev(imr), mr);
-out_umem:
- ib_umem_odp_release(odp);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ret;
}
@@ -531,8 +496,8 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags);
if (IS_ERR(imr)) {
- err = PTR_ERR(imr);
- goto out_umem;
+ ib_umem_odp_release(umem_odp);
+ return imr;
}
imr->ibmr.pd = &pd->ibpd;
@@ -562,93 +527,22 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
return imr;
out_mr:
mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
- mlx5_mr_cache_free(dev, imr);
-out_umem:
- ib_umem_odp_release(umem_odp);
+ mlx5_ib_dereg_mr(&imr->ibmr, NULL);
return ERR_PTR(err);
}
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr)
{
- struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
- struct mlx5_ib_dev *dev = mr_to_mdev(imr);
struct mlx5_ib_mr *mtt;
unsigned long idx;
- xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key));
- /*
- * All work on the prefetch list must be completed, xa_erase() prevented
- * new work from being created.
- */
- mlx5r_deref_wait_odp_mkey(&imr->mmkey);
- /*
- * At this point it is forbidden for any other thread to enter
- * pagefault_mr() on this imr. It is already forbidden to call
- * pagefault_mr() on an implicit child. Due to this additions to
- * implicit_children are prevented.
- * In addition, any new call to destroy_unused_implicit_child_mr()
- * may return immediately.
- */
-
/*
- * Fence the imr before we destroy the children. This allows us to
- * skip updating the XLT of the imr during destroy of the child mkey
- * the imr points to.
+ * If this is an implicit MR it is already invalidated so we can just
+ * delete the children mkeys.
*/
- mlx5_mr_cache_invalidate(imr);
-
- xa_for_each(&imr->implicit_children, idx, mtt) {
- xa_erase(&imr->implicit_children, idx);
- free_implicit_child_mr(mtt, false);
- }
-
- mlx5_mr_cache_free(dev, imr);
- ib_umem_odp_release(odp_imr);
-}
-
-/**
- * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR
- * @mr: to fence
- *
- * On return no parallel threads will be touching this MR and no DMA will be
- * active.
- */
-void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr)
-{
- /* Prevent new page faults and prefetch requests from succeeding */
- xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
-
- /* Wait for all running page-fault handlers to finish. */
- mlx5r_deref_wait_odp_mkey(&mr->mmkey);
-
- dma_fence_odp_mr(mr);
-}
-
-/**
- * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR
- * @mr: to fence
- *
- * On return no parallel threads will be touching this MR and no DMA will be
- * active.
- */
-void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr)
-{
- struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
-
- /* Prevent new page faults and prefetch requests from succeeding */
- xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key));
-
- mlx5r_deref_wait_odp_mkey(&mr->mmkey);
-
- dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
- mlx5_mr_cache_invalidate(mr);
- umem_dmabuf->private = NULL;
- ib_umem_dmabuf_unmap_pages(umem_dmabuf);
- dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
-
- if (!mr->cache_ent) {
- mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey);
- WARN_ON(mr->descs);
+ xa_for_each(&mr->implicit_children, idx, mtt) {
+ xa_erase(&mr->implicit_children, idx);
+ mlx5_ib_dereg_mr(&mtt->ibmr, NULL);
}
}
@@ -1637,20 +1531,24 @@ enum {
MLX5_IB_NUM_PF_DRAIN = 64,
};
-static int
-mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eq_param param = {};
- int err;
+ int err = 0;
+ mutex_lock(&dev->odp_eq_mutex);
+ if (eq->core)
+ goto unlock;
INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
spin_lock_init(&eq->lock);
eq->dev = dev;
eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
sizeof(struct mlx5_pagefault));
- if (!eq->pool)
- return -ENOMEM;
+ if (!eq->pool) {
+ err = -ENOMEM;
+ goto unlock;
+ }
eq->wq = alloc_workqueue("mlx5_ib_page_fault",
WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
@@ -1661,7 +1559,7 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
}
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
- param = (struct mlx5_eq_param) {
+ param = (struct mlx5_eq_param){
.irq_index = 0,
.nent = MLX5_IB_NUM_PF_EQE,
};
@@ -1677,21 +1575,27 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
goto err_eq;
}
+ mutex_unlock(&dev->odp_eq_mutex);
return 0;
err_eq:
mlx5_eq_destroy_generic(dev->mdev, eq->core);
err_wq:
+ eq->core = NULL;
destroy_workqueue(eq->wq);
err_mempool:
mempool_destroy(eq->pool);
+unlock:
+ mutex_unlock(&dev->odp_eq_mutex);
return err;
}
static int
-mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
+ if (!eq->core)
+ return 0;
mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
@@ -1735,6 +1639,8 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret = 0;
+ internal_fill_odp_caps(dev);
+
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
return ret;
@@ -1748,8 +1654,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
- ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
-
+ mutex_init(&dev->odp_eq_mutex);
return ret;
}
@@ -1758,7 +1663,7 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
return;
- mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
+ mlx5_ib_odp_destroy_eq(dev, &dev->odp_pf_eq);
}
int mlx5_ib_odp_init(void)
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f5a52a6fae43..9282eb10bfae 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -67,7 +67,7 @@ struct mlx5_modify_raw_qp_param {
struct mlx5_rate_limit rl;
u8 rq_q_ctr_id;
- u16 port;
+ u32 port;
};
static void get_cqs(enum ib_qp_type qp_type,
@@ -3146,6 +3146,19 @@ enum {
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
+static int mlx5_to_ib_rate_map(u8 rate)
+{
+ static const int rates[] = { IB_RATE_PORT_CURRENT, IB_RATE_56_GBPS,
+ IB_RATE_25_GBPS, IB_RATE_100_GBPS,
+ IB_RATE_200_GBPS, IB_RATE_50_GBPS,
+ IB_RATE_400_GBPS };
+
+ if (rate < ARRAY_SIZE(rates))
+ return rates[rate];
+
+ return rate - MLX5_STAT_RATE_OFFSET;
+}
+
static int ib_to_mlx5_rate_map(u8 rate)
{
switch (rate) {
@@ -4485,7 +4498,7 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid));
static_rate = MLX5_GET(ads, path, stat_rate);
- rdma_ah_set_static_rate(ah_attr, static_rate ? static_rate - 5 : 0);
+ rdma_ah_set_static_rate(ah_attr, mlx5_to_ib_rate_map(static_rate));
if (MLX5_GET(ads, path, grh) ||
ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label),
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
index 16145fda68d0..c0ddf7b3c6e2 100644
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -7,6 +7,8 @@
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#define UVERBS_MODULE_NAME mlx5_ib
@@ -23,6 +25,174 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)(
&mpd->pdn, sizeof(mpd->pdn));
}
+static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ bool sw_owner_supp;
+ u64 icm_rx;
+ u64 icm_tx;
+ int err;
+
+ sw_owner_supp = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner) ||
+ MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2);
+
+ if (vport == MLX5_VPORT_UPLINK) {
+ icm_rx = MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_rx);
+ icm_tx = MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_tx);
+ } else {
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_esw_vport_context_in, in, other_vport, true);
+
+ err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in,
+ out);
+
+ if (err)
+ return err;
+
+ icm_rx = MLX5_GET64(
+ query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_rx);
+
+ icm_tx = MLX5_GET64(
+ query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_tx);
+ }
+
+ if (sw_owner_supp && icm_rx) {
+ info->vport_steering_icm_rx = icm_rx;
+ info->flags |=
+ MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_RX;
+ }
+
+ if (sw_owner_supp && icm_tx) {
+ info->vport_steering_icm_tx = icm_tx;
+ info->flags |=
+ MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_TX;
+ }
+
+ return 0;
+}
+
+static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ size_t out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+ u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
+ void *out;
+ int err;
+
+ out = kzalloc(out_sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ MLX5_SET(query_hca_cap_in, in, other_function, true);
+ MLX5_SET(query_hca_cap_in, in, function_id, vport);
+ MLX5_SET(query_hca_cap_in, in, op_mod,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE |
+ HCA_CAP_OPMOD_GET_CUR);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_sz);
+ if (err)
+ goto out;
+
+ info->vport_vhca_id = MLX5_GET(query_hca_cap_out, out,
+ capability.cmd_hca_cap.vhca_id);
+
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
+out:
+ kfree(out);
+ return err;
+}
+
+static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_eswitch_rep *rep;
+ int err;
+
+ rep = dev->port[port_num - 1].rep;
+ if (!rep)
+ return -EOPNOTSUPP;
+
+ info->vport = rep->vport;
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
+
+ if (rep->vport != MLX5_VPORT_UPLINK) {
+ err = fill_vport_vhca_id(mdev, rep->vport, info);
+ if (err)
+ return err;
+ }
+
+ info->esw_owner_vhca_id = MLX5_CAP_GEN(mdev, vhca_id);
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_ESW_OWNER_VHCA_ID;
+
+ err = fill_vport_icm_addr(mdev, rep->vport, info);
+ if (err)
+ return err;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+ info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
+ mdev->priv.eswitch, rep->vport);
+ info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_uapi_query_port info = {};
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ u32 port_num;
+ int ret;
+
+ if (uverbs_copy_from(&port_num, attrs,
+ MLX5_IB_ATTR_QUERY_PORT_PORT_NUM))
+ return -EFAULT;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ if (!rdma_is_port_valid(&dev->ib_dev, port_num))
+ return -EINVAL;
+
+ if (mlx5_eswitch_mode(dev->mdev) == MLX5_ESWITCH_OFFLOADS) {
+ ret = fill_switchdev_info(dev, port_num, &info);
+ if (ret)
+ return ret;
+ }
+
+ return uverbs_copy_to_struct_or_zero(attrs, MLX5_IB_ATTR_QUERY_PORT, &info,
+ sizeof(info));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_QUERY_PORT,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_QUERY_PORT_PORT_NUM,
+ UVERBS_ATTR_TYPE(u32), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_PORT,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_uapi_query_port,
+ reg_c0),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_device,
+ UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(MLX5_IB_METHOD_QUERY_PORT));
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_PD_QUERY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE,
@@ -41,5 +211,8 @@ const struct uapi_definition mlx5_ib_std_types_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE(
UVERBS_OBJECT_PD,
&mlx5_ib_pd),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_DEVICE,
+ &mlx5_ib_device),
{},
};