From c4526fe2e4090b2d2167d36da8d9f4a866114e7a Mon Sep 17 00:00:00 2001 From: Rohit Chavan Date: Sun, 19 Mar 2023 15:38:47 +0530 Subject: RDMA/mlx5: Coding style fix reported by checkpatch Block comments should align the * on each line on line 2849 Avoid line continuations in quoted strings on line 3848 Signed-off-by: Rohit Chavan Link: https://lore.kernel.org/r/20230319100847.5566-1-roheetchavan@gmail.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7cc3b973dec7..2bad38cb39fe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2846,9 +2846,9 @@ static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag, case MLX5_QP_FLAG_SCATTER_CQE: case MLX5_QP_FLAG_ALLOW_SCATTER_CQE: /* - * We don't return error if these flags were provided, - * and mlx5 doesn't have right capability. - */ + * We don't return error if these flags were provided, + * and mlx5 doesn't have right capability. + */ *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE | MLX5_QP_FLAG_ALLOW_SCATTER_CQE); return; @@ -5592,8 +5592,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) { if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, vlan_cap))) { - mlx5_ib_dbg(dev, "VLAN offloads are not " - "supported\n"); + mlx5_ib_dbg(dev, "VLAN offloads are not supported\n"); err = -EOPNOTSUPP; goto out; } -- cgit v1.2.3 From 742948cc02d5230ef81546556502a05a90f89bbb Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Sun, 19 Mar 2023 14:59:31 +0200 Subject: RDMA/mlx5: Disable out-of-order in integrity enabled QPs Set retry_mode to GO_BACK_N when qp is created with INTEGRITY_EN flag because out-of-order is not supported when doing HW offload of signature operations. Signed-off-by: Or Har-Toov Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/362de42cdc7a541afa5b1fd0ec6ae706061764a2.1679230449.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7cc3b973dec7..0fdbf79a3e81 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -60,6 +60,10 @@ enum raw_qp_set_mask_map { MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, }; +enum { + MLX5_QP_RM_GO_BACK_N = 0x1, +}; + struct mlx5_modify_raw_qp_param { u16 operation; @@ -2519,6 +2523,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); + if (qp->flags & IB_QP_CREATE_INTEGRITY_EN && + MLX5_CAP_GEN(mdev, go_back_n)) + MLX5_SET(qpc, qpc, retry_mode, MLX5_QP_RM_GO_BACK_N); + err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); kvfree(in); if (err) -- cgit v1.2.3 From d22467a71ebe96ff5ab7e000dbef60d4ea76e5b0 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 23 Mar 2023 12:13:52 +0200 Subject: RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics Previously for switchdev only per device counters were supported. Currently we allocate counters for switchdev per port, which also includes the ports that belong to VF representors in order to expose them to users through the rdma tool, allowing the host to track the VFs statistics through their representors counters. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/ea31e1103c125cd27931ba213f307cde30d2eaed.1679566038.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 171 ++++++++++++++++++++++++++++------ 1 file changed, 142 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 3e1272695d99..1c06920505d2 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -5,6 +5,7 @@ #include "mlx5_ib.h" #include +#include #include "counters.h" #include "ib_rep.h" #include "qp.h" @@ -18,6 +19,10 @@ struct mlx5_ib_counter { #define INIT_Q_COUNTER(_name) \ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} +#define INIT_VPORT_Q_COUNTER(_name) \ + { .name = "vport_" #_name, .offset = \ + MLX5_BYTE_OFF(query_q_counter_out, _name)} + static const struct mlx5_ib_counter basic_q_cnts[] = { INIT_Q_COUNTER(rx_write_requests), INIT_Q_COUNTER(rx_read_requests), @@ -37,6 +42,25 @@ static const struct mlx5_ib_counter retrans_q_cnts[] = { INIT_Q_COUNTER(local_ack_timeout_err), }; +static const struct mlx5_ib_counter vport_basic_q_cnts[] = { + INIT_VPORT_Q_COUNTER(rx_write_requests), + INIT_VPORT_Q_COUNTER(rx_read_requests), + INIT_VPORT_Q_COUNTER(rx_atomic_requests), + INIT_VPORT_Q_COUNTER(out_of_buffer), +}; + +static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = { + INIT_VPORT_Q_COUNTER(out_of_sequence), +}; + +static const struct mlx5_ib_counter vport_retrans_q_cnts[] = { + INIT_VPORT_Q_COUNTER(duplicate_request), + INIT_VPORT_Q_COUNTER(rnr_nak_retry_err), + INIT_VPORT_Q_COUNTER(packet_seq_err), + INIT_VPORT_Q_COUNTER(implied_nak_seq_err), + INIT_VPORT_Q_COUNTER(local_ack_timeout_err), +}; + #define INIT_CONG_COUNTER(_name) \ { .name = #_name, .offset = \ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} @@ -67,6 +91,25 @@ static const struct mlx5_ib_counter roce_accl_cnts[] = { INIT_Q_COUNTER(roce_slow_restart_trans), }; +static const struct mlx5_ib_counter vport_extended_err_cnts[] = { + INIT_VPORT_Q_COUNTER(resp_local_length_error), + INIT_VPORT_Q_COUNTER(resp_cqe_error), + INIT_VPORT_Q_COUNTER(req_cqe_error), + INIT_VPORT_Q_COUNTER(req_remote_invalid_request), + INIT_VPORT_Q_COUNTER(req_remote_access_errors), + INIT_VPORT_Q_COUNTER(resp_remote_access_errors), + INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), + INIT_VPORT_Q_COUNTER(req_cqe_flush_error), +}; + +static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { + INIT_VPORT_Q_COUNTER(roce_adp_retrans), + INIT_VPORT_Q_COUNTER(roce_adp_retrans_to), + INIT_VPORT_Q_COUNTER(roce_slow_restart), + INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps), + INIT_VPORT_Q_COUNTER(roce_slow_restart_trans), +}; + #define INIT_EXT_PPCNT_COUNTER(_name) \ { .name = #_name, .offset = \ MLX5_BYTE_OFF(ppcnt_reg, \ @@ -153,12 +196,20 @@ static int mlx5_ib_create_counters(struct ib_counters *counters, return 0; } +static bool vport_qcounters_supported(struct mlx5_ib_dev *dev) +{ + return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) && + MLX5_CAP_GEN(dev->mdev, q_counter_aggregation); +} static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, u32 port_num) { - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; + if ((is_mdev_switchdev_mode(dev->mdev) && + !vport_qcounters_supported(dev)) || !port_num) + return &dev->port[0].cnts; + + return &dev->port[port_num - 1].cnts; } /** @@ -172,7 +223,7 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, */ u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num) { - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1); return cnts->set_id; } @@ -270,12 +321,44 @@ free: return ret; } +static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, + u32 port_num, + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) + +{ + u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + __be32 val; + int ret, i; + + if (!dev->port[port_num].rep || + dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK) + return 0; + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, other_vport, 1); + MLX5_SET(query_q_counter_in, in, vport_number, + dev->port[port_num].rep->vport); + MLX5_SET(query_q_counter_in, in, aggregate, 1); + ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out); + if (ret) + return ret; + + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)((void *)out + cnts->offsets[i]); + stats->value[i] = (u64)be32_to_cpu(val); + } + + return 0; +} + static int do_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); struct mlx5_core_dev *mdev; int ret, num_counters; @@ -286,11 +369,19 @@ static int do_get_hw_stats(struct ib_device *ibdev, cnts->num_cong_counters + cnts->num_ext_ppcnt_counters; - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) + ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts, + stats); + else + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, + cnts->set_id); if (ret) return ret; + /* We don't expose device counters over Vports */ + if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + goto done; + if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); if (ret) @@ -335,7 +426,8 @@ static int do_get_op_stat(struct ib_device *ibdev, u32 type; int ret; - cnts = get_counters(dev, port_num - 1); + cnts = get_counters(dev, port_num); + opfcs = cnts->opfcs; type = *(u32 *)cnts->descs[index].priv; if (type >= MLX5_IB_OPCOUNTER_MAX) @@ -362,7 +454,7 @@ static int do_get_op_stats(struct ib_device *ibdev, const struct mlx5_ib_counters *cnts; int index, ret, num_hw_counters; - cnts = get_counters(dev, port_num - 1); + cnts = get_counters(dev, port_num); num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + cnts->num_ext_ppcnt_counters; for (index = num_hw_counters; @@ -383,7 +475,7 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); const struct mlx5_ib_counters *cnts; - cnts = get_counters(dev, port_num - 1); + cnts = get_counters(dev, port_num); num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters + cnts->num_ext_ppcnt_counters; num_counters = num_hw_counters + cnts->num_op_counters; @@ -410,8 +502,7 @@ static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); + const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); return do_alloc_stats(cnts); } @@ -419,8 +510,7 @@ mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); + const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port); return mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats, counter->id); @@ -479,44 +569,55 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) } static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - struct rdma_stat_desc *descs, size_t *offsets) + struct rdma_stat_desc *descs, size_t *offsets, + u32 port_num) { - int i; - int j = 0; + bool is_vport = is_mdev_switchdev_mode(dev->mdev) && + port_num != MLX5_VPORT_PF; + const struct mlx5_ib_counter *names; + int j = 0, i; + names = is_vport ? vport_basic_q_cnts : basic_q_cnts; for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - descs[j].name = basic_q_cnts[i].name; + descs[j].name = names[i].name; offsets[j] = basic_q_cnts[i].offset; } + names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts; if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - descs[j].name = out_of_seq_q_cnts[i].name; + descs[j].name = names[i].name; offsets[j] = out_of_seq_q_cnts[i].offset; } } + names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts; if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - descs[j].name = retrans_q_cnts[i].name; + descs[j].name = names[i].name; offsets[j] = retrans_q_cnts[i].offset; } } + names = is_vport ? vport_extended_err_cnts : extended_err_cnts; if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - descs[j].name = extended_err_cnts[i].name; + descs[j].name = names[i].name; offsets[j] = extended_err_cnts[i].offset; } } + names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts; if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - descs[j].name = roce_accl_cnts[i].name; + descs[j].name = names[i].name; offsets[j] = roce_accl_cnts[i].offset; } } + if (is_vport) + return; + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { descs[j].name = cong_cnts[i].name; @@ -558,9 +659,9 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) + struct mlx5_ib_counters *cnts, u32 port_num) { - u32 num_counters, num_op_counters; + u32 num_counters, num_op_counters = 0; num_counters = ARRAY_SIZE(basic_q_cnts); @@ -578,6 +679,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_q_counters = num_counters; + if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF) + goto skip_non_qcounters; + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); num_counters += ARRAY_SIZE(cong_cnts); @@ -597,6 +701,7 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, ft_field_support_2_nic_transmit_rdma.bth_opcode)) num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts); +skip_non_qcounters: cnts->num_op_counters = num_op_counters; num_counters += num_op_counters; cnts->descs = kcalloc(num_counters, @@ -623,7 +728,8 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) int num_cnt_ports; int i, j; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || + vport_qcounters_supported(dev)) ? dev->num_ports : 1; MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); @@ -662,15 +768,16 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || + vport_qcounters_supported(dev)) ? dev->num_ports : 1; for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); + err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i); if (err) goto err_alloc; mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs, - dev->port[i].cnts.offsets); + dev->port[i].cnts.offsets, i); MLX5_SET(alloc_q_counter_in, in, uid, is_shared ? MLX5_SHARED_RESOURCE_UID : 0); @@ -889,6 +996,10 @@ static const struct ib_device_ops hw_stats_ops = { mlx5_ib_modify_stat : NULL, }; +static const struct ib_device_ops hw_switchdev_vport_op = { + .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats, +}; + static const struct ib_device_ops hw_switchdev_stats_ops = { .alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats, .get_hw_stats = mlx5_ib_get_hw_stats, @@ -914,9 +1025,11 @@ int mlx5_ib_counters_init(struct mlx5_ib_dev *dev) if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) return 0; - if (is_mdev_switchdev_mode(dev->mdev)) + if (is_mdev_switchdev_mode(dev->mdev)) { ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops); - else + if (vport_qcounters_supported(dev)) + ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op); + } else ib_set_device_ops(&dev->ib_dev, &hw_stats_ops); return mlx5_ib_alloc_counters(dev); } -- cgit v1.2.3 From 081c27b3bcdbfad6fa3c16975e02e33073f1267d Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 30 Mar 2023 11:36:07 -0400 Subject: RDMA/mlx5: Remove unused num_alloc_xa_entries variable clang with W=1 reports drivers/infiniband/hw/mlx5/devx.c:1996:6: error: variable 'num_alloc_xa_entries' set but not used [-Werror,-Wunused-but-set-variable] int num_alloc_xa_entries = 0; ^ This variable is not used so remove it. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20230330153607.1838750-1-trix@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2211a0be16f3..07037b829c7e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1993,7 +1993,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( int redirect_fd; bool use_eventfd = false; int num_events; - int num_alloc_xa_entries = 0; u16 obj_type = 0; u64 cookie = 0; u32 obj_id = 0; @@ -2075,7 +2074,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( if (err) goto err; - num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); if (!event_sub) { err = -ENOMEM; -- cgit v1.2.3 From ed4b0661cce119870edb1994fd06c9cbc1dc05c3 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 10 Apr 2023 16:07:50 +0300 Subject: RDMA/mlx5: Remove pcie_relaxed_ordering_enabled() check for RO write pcie_relaxed_ordering_enabled() check was added to avoid a syndrome when creating a MKey with relaxed ordering (RO) enabled when the driver's relaxed_ordering_{read,write} HCA capabilities are out of sync with FW. While this can happen with relaxed_ordering_read, it can't happen with relaxed_ordering_write as it's set if the device supports RO write, regardless of RO in PCI config space, and thus can't change during runtime. Therefore, drop the pcie_relaxed_ordering_enabled() check for relaxed_ordering_write while keeping it for relaxed_ordering_read. Doing so will also allow the usage of RO write in VFs and VMs (where RO in PCI config space is not reported/emulated properly). Signed-off-by: Avihai Horon Reviewed-by: Shay Drory Link: https://lore.kernel.org/r/7e8f55e31572c1702d69cae015a395d3a824a38a.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 67356f515261..bd0a818ba1cd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -67,11 +67,11 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); - if ((acc & IB_ACCESS_RELAXED_ORDERING) && - pcie_relaxed_ordering_enabled(dev->mdev->pdev)) { + if (acc & IB_ACCESS_RELAXED_ORDERING) { if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev)) MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); } -- cgit v1.2.3 From d43b020b0f82c088ef8ff3196ef00575a97d200e Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 10 Apr 2023 16:07:51 +0300 Subject: RDMA/mlx5: Check pcie_relaxed_ordering_enabled() in UMR relaxed_ordering_read HCA capability is set if both the device supports relaxed ordering (RO) read and RO is set in PCI config space. RO in PCI config space can change during runtime. This will change the value of relaxed_ordering_read HCA capability in FW, but the driver will not see it since it queries the capabilities only once. This can lead to the following scenario: 1. RO in PCI config space is enabled. 2. User creates MKey without RO. 3. RO in PCI config space is disabled. As a result, relaxed_ordering_read HCA capability is turned off in FW but remains on in driver copy of the capabilities. 4. User requests to reconfig the MKey with RO via UMR. 5. Driver will try to reconfig the MKey with RO read although it shouldn't (as relaxed_ordering_read HCA capability is really off). To fix this, check pcie_relaxed_ordering_enabled() before setting RO read in UMR. Fixes: 896ec9735336 ("RDMA/mlx5: Set mkey relaxed ordering by UMR with ConnectX-7") Signed-off-by: Avihai Horon Reviewed-by: Shay Drory Link: https://lore.kernel.org/r/8d39eb8317e7bed1a354311a20ae707788fd94ed.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/umr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 55f4e048d947..c9e176e8ced4 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -380,6 +380,9 @@ static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, struct mlx5_mkey_seg *seg, unsigned int access_flags) { + bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev); + MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); @@ -387,8 +390,7 @@ static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, MLX5_SET(mkc, seg, lr, 1); MLX5_SET(mkc, seg, relaxed_ordering_write, !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); + MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read); } int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, -- cgit v1.2.3 From ccbbfe0682f2fff1e157413c30092dd27c50e20e Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 10 Apr 2023 16:07:52 +0300 Subject: net/mlx5: Update relaxed ordering read HCA capabilities Rename existing HCA capability relaxed_ordering_read to relaxed_ordering_read_pci_enabled. This is in accordance with recent PRM change to better describe the capability, as it's set only if both the device supports relaxed ordering (RO) read and RO is enabled in PCI config space. In addition, add new HCA capability relaxed_ordering_read which is set if the device supports RO read, regardless of RO in PCI config space. This will be used in the following patch to allow RO in VFs and VMs. Signed-off-by: Avihai Horon Reviewed-by: Shay Drory Link: https://lore.kernel.org/r/caa0002fd8135086357dfcc368e2f5cc73b08480.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 5 +++-- drivers/infiniband/hw/mlx5/umr.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bd0a818ba1cd..6a3a8e00bfaa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -70,7 +70,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, if (acc & IB_ACCESS_RELAXED_ORDERING) { if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + if (MLX5_CAP_GEN(dev->mdev, + relaxed_ordering_read_pci_enabled) && pcie_relaxed_ordering_enabled(dev->mdev->pdev)) MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); } @@ -791,7 +792,7 @@ static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, ret |= IB_ACCESS_RELAXED_ORDERING; if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) ret |= IB_ACCESS_RELAXED_ORDERING; diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h index c9d0021381a2..e12ecd7e079c 100644 --- a/drivers/infiniband/hw/mlx5/umr.h +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -62,7 +62,7 @@ static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, return false; if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; -- cgit v1.2.3 From bd4ba605c4a92b46ab414626a4f969a19103f97a Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 10 Apr 2023 16:07:53 +0300 Subject: RDMA/mlx5: Allow relaxed ordering read in VFs and VMs According to PCIe spec, Enable Relaxed Ordering value in the VF's PCI config space is wired to 0 and PF relaxed ordering (RO) setting should be applied to the VF. In QEMU (and maybe others), when assigning VFs, the RO bit in PCI config space is not emulated properly and is always set to 0. Therefore, pcie_relaxed_ordering_enabled() always returns 0 for VFs and VMs and thus MKeys can't be created with RO read even if the PF supports it. pcie_relaxed_ordering_enabled() check was added to avoid a syndrome when creating a MKey with relaxed ordering (RO) enabled when the driver's relaxed_ordering_read_pci_enabled HCA capability is out of sync with FW. With the new relaxed_ordering_read capability this can't happen, as it's set regardless of RO value in PCI config space and thus can't change during runtime. Hence, to allow RO read in VFs and VMs, use the new HCA capability relaxed_ordering_read without checking pcie_relaxed_ordering_enabled(). The old capability checks are kept for backward compatibility with older FWs. Allowing RO in VFs and VMs is valuable since it can greatly improve performance on some setups. For example, testing throughput of a VF on an AMD EPYC 7763 and ConnectX-6 Dx setup showed roughly 60% performance improvement. Signed-off-by: Avihai Horon Reviewed-by: Shay Drory Reviewed-by: Aya Levin Link: https://lore.kernel.org/r/e7048640d66c341a8fa0465e099926e7989184bc.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 11 +++++++---- drivers/infiniband/hw/mlx5/umr.c | 3 ++- drivers/infiniband/hw/mlx5/umr.h | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6a3a8e00bfaa..2017ede100a6 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -70,9 +70,11 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, if (acc & IB_ACCESS_RELAXED_ORDERING) { if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); - if (MLX5_CAP_GEN(dev->mdev, - relaxed_ordering_read_pci_enabled) && - pcie_relaxed_ordering_enabled(dev->mdev->pdev)) + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || + (MLX5_CAP_GEN(dev->mdev, + relaxed_ordering_read_pci_enabled) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev))) MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); } @@ -792,7 +794,8 @@ static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, ret |= IB_ACCESS_RELAXED_ORDERING; if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && + (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) ret |= IB_ACCESS_RELAXED_ORDERING; diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index c9e176e8ced4..234bf30db731 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -381,7 +381,8 @@ static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev, unsigned int access_flags) { bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) && - pcie_relaxed_ordering_enabled(dev->mdev->pdev); + (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || + pcie_relaxed_ordering_enabled(dev->mdev->pdev)); MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h index e12ecd7e079c..3799bb758e49 100644 --- a/drivers/infiniband/hw/mlx5/umr.h +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -62,7 +62,8 @@ static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, return false; if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && + (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) || + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; -- cgit v1.2.3 From 3e358ea8614ddfbc59ca7a3f5dff5dde2b350b2c Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 13 Apr 2023 12:23:09 +0300 Subject: RDMA/mlx5: Fix flow counter query via DEVX Commit cited in "fixes" tag added bulk support for flow counters but it didn't account that's also possible to query a counter using a non-base id if the counter was allocated as bulk. When a user performs a query, validate the flow counter id given in the mailbox is inside the valid range taking bulk value into account. Fixes: 208d70f562e5 ("IB/mlx5: Support flow counters offset for bulk counters") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/79d7fbe291690128e44672418934256254d93115.1681377114.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 07037b829c7e..db5fb196c728 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -666,7 +666,21 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, obj_id; case MLX5_IB_OBJECT_DEVX_OBJ: - return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + { + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + struct devx_obj *devx_uobj = uobj->object; + + if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER && + devx_uobj->flow_counter_bulk_size) { + u64 end; + + end = devx_uobj->obj_id + + devx_uobj->flow_counter_bulk_size; + return devx_uobj->obj_id <= obj_id && end > obj_id; + } + + return devx_uobj->obj_id == obj_id; + } default: return false; @@ -1517,10 +1531,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( goto obj_free; if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { - u8 bulk = MLX5_GET(alloc_flow_counter_in, - cmd_in, - flow_counter_bulk); - obj->flow_counter_bulk_size = 128UL * bulk; + u32 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk_log_size); + + if (bulk) + bulk = 1 << bulk; + else + bulk = 128UL * MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = bulk; } uobj->object = obj; -- cgit v1.2.3 From 746aa3c8cb1a650ff2583497ac646e505831b9b9 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 20 Apr 2023 04:39:06 +0300 Subject: RDMA/mlx5: Use correct device num_ports when modify DC Just like other QP types, when modify DC, the port_num should be compared with dev->num_ports, instead of HCA_CAP.num_ports. Otherwise Multi-port vHCA on DC may not work. Fixes: 776a3906b692 ("IB/mlx5: Add support for DC target QP") Link: https://lore.kernel.org/r/20230420013906.1244185-1-markzhang@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/mlx5') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 1093d3a0ed43..70ca8ffa9256 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4493,7 +4493,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return -EINVAL; if (attr->port_num == 0 || - attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) { + attr->port_num > dev->num_ports) { mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", attr->port_num, dev->num_ports); return -EINVAL; -- cgit v1.2.3