diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 287 |
1 files changed, 259 insertions, 28 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 997cbfe4b90c..e874d688d040 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -40,7 +40,7 @@ #include <linux/slab.h> #include <linux/bitmap.h> #if defined(CONFIG_X86) -#include <asm/pat.h> +#include <asm/memtype.h> #endif #include <linux/sched.h> #include <linux/sched/mm.h> @@ -815,6 +815,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + size_t uhw_outlen = (uhw) ? uhw->outlen : 0; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; @@ -828,12 +829,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, u64 max_tso; resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); - if (uhw->outlen && uhw->outlen < resp_len) + if (uhw_outlen && uhw_outlen < resp_len) return -EINVAL; resp.response_length = resp_len; - if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) + if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -897,7 +898,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -907,7 +908,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -927,9 +928,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) resp.response_length += sizeof(resp.rss_caps); } @@ -1014,6 +1015,23 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; + if (!uhw) { + /* ODP for kernel QPs is not implemented for receive + * WQEs and SRQ WQEs + */ + props->odp_caps.per_transport_caps.rc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.uc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.ud_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + props->odp_caps.per_transport_caps.xrc_odp_caps &= + ~(IB_ODP_SUPPORT_READ | + IB_ODP_SUPPORT_SRQ_RECV); + } } if (MLX5_CAP_GEN(mdev, cd)) @@ -1054,7 +1072,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1072,7 +1090,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && + if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1091,7 +1109,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw->outlen)) { + uhw_outlen)) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1104,7 +1122,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw->outlen)) { + if (field_avail(typeof(resp), flags, uhw_outlen)) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1120,8 +1138,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1141,7 +1158,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) && + if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1164,8 +1181,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1186,7 +1202,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } - if (uhw->outlen) { + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) @@ -2078,6 +2094,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) { struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); + struct mlx5_var_table *var_table = &dev->var_table; struct mlx5_ib_dm *mdm; switch (mentry->mmap_flag) { @@ -2087,6 +2104,12 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) mdm->size); kfree(mdm); break; + case MLX5_IB_MMAP_TYPE_VAR: + mutex_lock(&var_table->bitmap_lock); + clear_bit(mentry->page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + kfree(mentry); + break; default: WARN_ON(true); } @@ -2246,7 +2269,10 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, mentry = to_mmmap(entry); pfn = (mentry->address >> PAGE_SHIFT); - prot = pgprot_writecombine(vma->vm_page_prot); + if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR) + prot = pgprot_noncached(vma->vm_page_prot); + else + prot = pgprot_writecombine(vma->vm_page_prot); ret = rdma_user_mmap_io(ucontext, vma, pfn, entry->npages * PAGE_SIZE, prot, @@ -2255,6 +2281,15 @@ static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, return ret; } +static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry) +{ + u16 cmd = entry->rdma_entry.start_pgoff >> 16; + u16 index = entry->rdma_entry.start_pgoff & 0xFFFF; + + return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) | + (index & 0xFF)) << PAGE_SHIFT; +} + static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -3276,12 +3311,14 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, int num_entries, int num_groups, u32 flags) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; - ft = mlx5_create_auto_grouped_flow_table(ns, priority, - num_entries, - num_groups, - 0, flags); + ft_attr.prio = priority; + ft_attr.max_fte = num_entries; + ft_attr.flags = flags; + ft_attr.autogroup.max_num_groups = num_groups; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return ERR_CAST(ft); @@ -4771,7 +4808,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) @@ -4781,7 +4817,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; @@ -5351,6 +5387,14 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(req_cqe_flush_error), }; +static const struct mlx5_ib_counter roce_accl_cnts[] = { + INIT_Q_COUNTER(roce_adp_retrans), + INIT_Q_COUNTER(roce_adp_retrans_to), + INIT_Q_COUNTER(roce_slow_restart), + INIT_Q_COUNTER(roce_slow_restart_cnps), + INIT_Q_COUNTER(roce_slow_restart_trans), +}; + #define INIT_EXT_PPCNT_COUNTER(_name) \ { .name = #_name, .offset = \ MLX5_BYTE_OFF(ppcnt_reg, \ @@ -5399,6 +5443,9 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) num_counters += ARRAY_SIZE(extended_err_cnts); + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) + num_counters += ARRAY_SIZE(roce_accl_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -5459,6 +5506,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { + for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + names[j] = roce_accl_cnts[i].name; + offsets[j] = roce_accl_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; @@ -6034,6 +6088,145 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } +static int var_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_user_mmap_entry *obj = uobject->object; + + rdma_user_mmap_entry_remove(&obj->rdma_entry); + return 0; +} + +static struct mlx5_user_mmap_entry * +alloc_var_entry(struct mlx5_ib_ucontext *c) +{ + struct mlx5_user_mmap_entry *entry; + struct mlx5_var_table *var_table; + u32 page_idx; + int err; + + var_table = &to_mdev(c->ibucontext.device)->var_table; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + mutex_lock(&var_table->bitmap_lock); + page_idx = find_first_zero_bit(var_table->bitmap, + var_table->num_var_hw_entries); + if (page_idx >= var_table->num_var_hw_entries) { + err = -ENOSPC; + mutex_unlock(&var_table->bitmap_lock); + goto end; + } + + set_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); + + entry->address = var_table->hw_start_addr + + (page_idx * var_table->stride_size); + entry->page_idx = page_idx; + entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; + + err = rdma_user_mmap_entry_insert_range( + &c->ibucontext, &entry->rdma_entry, var_table->stride_size, + MLX5_IB_MMAP_OFFSET_START << 16, + (MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1); + if (err) + goto err_insert; + + return entry; + +err_insert: + mutex_lock(&var_table->bitmap_lock); + clear_bit(page_idx, var_table->bitmap); + mutex_unlock(&var_table->bitmap_lock); +end: + kfree(entry); + return ERR_PTR(err); +} + +static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject( + attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); + struct mlx5_ib_ucontext *c; + struct mlx5_user_mmap_entry *entry; + u64 mmap_offset; + u32 length; + int err; + + c = to_mucontext(ib_uverbs_get_ucontext(attrs)); + if (IS_ERR(c)) + return PTR_ERR(c); + + entry = alloc_var_entry(c); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + mmap_offset = mlx5_entry_to_mmap_offset(entry); + length = entry->rdma_entry.npages * PAGE_SIZE; + uobj->object = entry; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + &entry->page_idx, sizeof(entry->page_idx)); + if (err) + goto err; + + err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + goto err; + + return 0; + +err: + rdma_user_mmap_entry_remove(&entry->rdma_entry); + return err; +} + +DECLARE_UVERBS_NAMED_METHOD( + MLX5_IB_METHOD_VAR_OBJ_ALLOC, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY( + MLX5_IB_METHOD_VAR_OBJ_DESTROY, + UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE, + MLX5_IB_OBJECT_VAR, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, + UVERBS_TYPE_ALLOC_IDR(var_obj_cleanup), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), + &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); + +static bool var_is_supported(struct ib_device *device) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + + return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); +} + ADD_UVERBS_ATTRIBUTES_SIMPLE( mlx5_ib_dm, UVERBS_OBJECT_DM, @@ -6056,14 +6249,14 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( enum mlx5_ib_uapi_flow_action_flags)); static const struct uapi_definition mlx5_ib_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), -#endif UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, + UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), {} }; @@ -6335,6 +6528,35 @@ static const struct ib_device_ops mlx5_ib_dev_dm_ops = { .reg_dm_mr = mlx5_ib_reg_dm_mr, }; +static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_var_table *var_table = &dev->var_table; + u8 log_doorbell_bar_size; + u8 log_doorbell_stride; + u64 bar_size; + + log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_bar_size); + log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev, + log_doorbell_stride); + var_table->hw_start_addr = dev->mdev->bar_addr + + MLX5_CAP64_DEV_VDPA_EMULATION(mdev, + doorbell_bar_offset); + bar_size = (1ULL << log_doorbell_bar_size) * 4096; + var_table->stride_size = 1ULL << log_doorbell_stride; + var_table->num_var_hw_entries = bar_size / var_table->stride_size; + mutex_init(&var_table->bitmap_lock); + var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, + GFP_KERNEL); + return (var_table->bitmap) ? 0 : -ENOMEM; +} + +static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) +{ + bitmap_free(dev->var_table.bitmap); +} + static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -6422,6 +6644,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) mutex_init(&dev->lb.mutex); + if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_ib_init_var_table(dev); + if (err) + return err; + } + dev->ib_dev.use_cq_dim = true; return 0; @@ -6725,6 +6954,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) { + dev->ib_active = false; + /* Number of stages to cleanup */ while (stage) { stage--; @@ -6770,7 +7001,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_non_default_cb, NULL), @@ -6827,7 +7058,7 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, - NULL), + mlx5_ib_stage_caps_cleanup), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, mlx5_ib_stage_raw_eth_non_default_cb, NULL), |